-- Copyright 1993-1998, by the Cecil Project -- Department of Computer Science and Engineering, University of Washington -- See the LICENSE file for license information. (--DOC Collections are groups of elements. We first introduce abstract interfaces, describing and refining various kinds of collections and their operations. We then shift to discussing concrete implementations which can be instantiated and manipulated at runtime. There are three major families of collections: unordered collections (like bags and sets), ordered collections (e.g., lists), and tables (or keyed collections). Indexed collections like arrays, vectors, and strings are both ordered collections and keyed tables, where the keys are integer indices. Abstract collection classes typically come in three versions: generic, immutable, and mutable, with the latter two indicated by `i_' and `m_' prefixes. For example: abstract object indexed[`T] isa table[int,T], indexed[`S >= T]; abstract object i_indexed[`T] isa indexed[T]; abstract object m_indexed[`T] isa indexed[T]; This troika of versions of each abstraction is a standard idiom among the collection classes. A generic base class defines the abstract read-only behavior of the object, but doesn't specify whether the collection is mutable. One subclass adds the immutability specification, and another subclass adds the mutator operations. An immutable collection may contain mutable objects that are side-effected while in the collection, but the collection itself cannot be changed. There are three varieties of mutation: replacement (removing one element but inserting another in its place), insertion (increasing the collection's size), and deletion (making the collection smaller). The latter two capabilities are captured by the abstract `extensible_collection[T]' and `removable_collection[T]' objects, which may be inherited by mutable objects. Separating the read-only from the read-write interfaces (e.g., `unordered_collection' vs. `m_unordered_collection') supports useful subtyping relationships among the read-only interfaces. To support the most reuse, the generic read-only interface should be used as a type declaration whenever possible. Only if mutation is required should the mutable subtype interface be used. The immutable interface must be distinguished from the generic read-only interface because the mutable interface cannot be a subtype of the immutable interface, considering the behavior specification implied by the immutable interface. For a given abstraction, e.g., `indexed', both the immutable and mutable versions containing elements of type `T' (or subtype of `T') subclass the generic version with elements of type `T' or any supertype of `T'. In addition, the immutable version, `i_indexed[T]', is a subtype of any immutable collection (of the same kind) of a supertype of `T': extend type i_indexed[`T] subtypes i_indexed[`S >= T]; In contrast, a mutable version of a type `T' has no subtyping relation to a mutable collection with a different element type, e.g., `m_indexed[int]' is unrelated to `m_indexed[num]' : `m_indexed[int]' cannot have floats stored in it (unlike `m_indexed[num]'), and so `m_indexed[int]' is not a subtype of `m_indexed[num]'. Also, `m_indexed[num]' can contain things other than `int's and reveal this through `do', `pick_any', etc. (unlike `m_indexed[int]'), so the reverse subtyping relation doesn't hold, either. Finally, the immutable and mutable versions subclass those of the abstraction higher in the class hierarchy: extend i_indexed[`T] isa i_table[int,T]; extend m_indexed[`T] isa m_table[int,T]; --) module Collection extends Comparable { --DOCTEX \subsection{Basic collections} --DOC `collection[T]' is a collection of items of some type `T' --DOC (or any subtype of `T'). A collection of some type `T' is a subtype of --DOC all collections of types that are supertypes of `T'. abstract object collection[T]; extend type collection[`T] subtypes collection[`S >= T]; --DOC Collections support a `length' operation (implemented by subclasses) --DOC which returns the number of elements in the collection, --DOC plus a number of length-related predicates. signature length(collection[`T]):int; --DOCSHORT length = 0 method is_empty(c@:collection[`T]):bool { c.length = 0 } --DOCSHORT length = 0 --DOCSHORT length > 0 method non_empty(c@:collection[`T]):bool { not(c.is_empty) } --DOCSHORT length = 1 method is_singleton(c@:collection[`T]):bool { c.length = 1 } --DOCSHORT length > 0 method is_multiple(c@:collection[`T]):bool { c.length > 1 } --DOCSKIP avoid putting predicates in the stdlib manual predicate empty_collection[T] isa collection[T] when collection.is_empty; predicate non_empty_collection[T] isa collection[T] when collection.non_empty; divide collection[T] into empty_collection[T], non_empty_collection[T]; predicate singleton_collection[T] isa non_empty_collection[T] when non_empty_collection.is_singleton; predicate multiple_collection[T] isa non_empty_collection[T] when non_empty_collection.is_multiple; divide non_empty_collection[T] into singleton_collection[T], multiple_collection[T]; --DOCENDSKIP ---------- -- Basic iteration ---------- --DOC Collections support a number of control structures. Primary among all --DOC control structures is the `do' method that iterates through the --DOC collection and invokes an argument closure on each element. Elements --DOC are processed in some unspecified order which may vary from --DOC invocation to invocation, even if the collection is not modified --DOC between finishing one `do' loop and starting the next. For example: --DOC --DOC myCollection.do(&(elem:elemType){ -- bind `elem' to each element --DOC ... -- of `myCollection' in turn --DOC }); --DOC signature do(collection[`T], closure:&(T):void):void; --DOC The collection cannot be modified while `do' is active without --DOC potentially bizarre results. A related control structure, --DOC `do_allowing_updates', allows the collection to be modified during the --DOC iteration. (The effect of modification during iteration depends on --DOC the kind of collection and the kind of update.) method do_allowing_updates(t@:collection[`T], closure:&(T):void):void { do(t, closure); } ---------- -- Searching ---------- --DOC The `includes' method computes whether the collection `c' contains an --DOC element which is equal, using `=', to `x'. The `includes_all' --DOC method returns `true' if `c' contains elements which are equal to --DOC all the elements of `c2'. The `includes_some' method returns --DOC `true' if `c' contains an element which satisfies the predicate `test'. method includes(c@:collection[`T <= comparable[T]], x:T):bool { do(c, &(e:T){ if(e = x, { ^ true }); }); false } method includes_all(c1@:collection[`T <= comparable[T]], c2:collection[T]):bool { do(c2, &(x:T){ if_false(includes(c1, x), { ^ false }); }); true } method includes_some(c@:collection[`T], test:&(T):bool):bool { do(c, &(e:T){ if(eval(test, e), { ^ true }); }); false } --DOC The `count' method returns the number of times an element --DOC appears in a collection. method count(c@:collection[`T <= comparable[T]], x:T):int { let var cnt:int := 0; do(c, &(e:T){ if(e = x, { cnt := cnt.succ; }); }); cnt } --DOC The `count_pred' method returns the number of elements of the --DOC collection `c' for which the predicate `test' evaluates to true. method count_pred(c@:collection[`T <= comparable[T]], test:&(T):bool):int { let var cnt:int := 0; do(c, &(e:T){ if(eval(test, e), { cnt := cnt.succ; }); }); cnt } --DOC The `find' method returns an element of collection `c' --DOC satisfying the predicate `test'. method find(c@:collection[`T], test:&(T):bool):T { find(c, test, { error("item not found") }) } method find(c@:collection[`T], test:&(T):bool, if_absent:&():`S):T|S (** inline **) { do(c, &(e:T){ if(eval(test, e), { ^e }); }); eval(if_absent) } --DOC Tests whether the predicate is true of `every' or `any' collection --DOC element. method every(c@:collection[`T], test:&(T):bool):bool { do(c, &(e:T){ if_false(eval(test, e), { ^ false }); }); true } method any(c@:collection[`T], test:&(T):bool):bool { do(c, &(e:T){ if(eval(test, e), { ^ true }); }); false } ---------- -- Reduction ---------- --DOC Implements the classic functional reduce operation over collections. method reduce(t@:collection[`T], bin_op:&(T,S):S, init:`S):S { let var result:S := init; t.do(&(x:T){ result := eval(bin_op, x, result); }); result } --DOC Implements a streamlined version that works only on nonempty collections, --DOC invoking a closure on an empty collection, and doesn't require an init --DOC value. method reduce(t@:collection[`T], bin_op:&(T,T):T):T { t.reduce_nonempty(bin_op) } method reduce_nonempty(t@:collection[`T], bin_op:&(T,T):T):T { t.reduce_nonempty(bin_op, { error("reducing an empty collection") }) } method reduce_nonempty(t@:collection[`T], bin_op:&(T,T):T, if_empty:&():T):T { let var result:T := t.pick_any({ ^ eval(if_empty) }); let var first:bool := true; t.do(&(x:T){ if(first, { result := x; first := false; }, { result := eval(bin_op, x, result); }); }); result } --DOC The `min', `max', and `average' methods are defined on collections as well --DOC as pairs of values. They are synonyms of the `(min|max|average)_over_all' --DOC methods, which optionally take a closure to handle empty conditions. method min(t@:collection[`T <= ordered[T]]):T { min_over_all(t) } method min_over_all(t@:collection[`T <= ordered[T]]):T { min_over_all(t, { error("computing minimum of empty collection") }) } method min_over_all(t@:collection[`T <= ordered[T]], if_empty:&():T):T { reduce_nonempty(t, &(a:T,b:T){ min(a,b) }, if_empty) } method max(t@:collection[`T <= ordered[T]]):T { max_over_all(t) } method max_over_all(t@:collection[`T <= ordered[T]]):T { max_over_all(t, { error("computing maximum of empty collection") }) } method max_over_all(t@:collection[`T <= ordered[T]], if_empty:&():T):T { reduce_nonempty(t, &(a:T,b:T){ max(a,b) }, if_empty) } method average(t@:collection[`T <= num]):T { average_over_all(t) } method average_over_all(t@:collection[`T <= num]):T { average_over_all(t, { error("computing average of empty collection") }) } method average_over_all(t@:collection[`T <= num], if_empty:&():T):T { cast[T](reduce_nonempty(t, &(a:T,b:T){ a+b }, if_empty) / t.length) } ---------- -- Selection ---------- --DOC The `pick_any' method returns some element of the collection, --DOC invoking `if_empty' or producing an error if the collection is --DOC empty. The `only' method returns the only element of the argument --DOC collection, producing an error or invoking `if_non_singleton' if the --DOC collection has zero or multiple elements. method pick_any(c@:collection[`T]):T { pick_any(c, { error("collection is empty") }) } method pick_any(c@:collection[`T1], if_empty:&():`T2):T1|T2 { c.do(&(x:T1){ ^ x }); eval(if_empty) } method only(c@:collection[`T]):T { c.only({ error("should be a singleton collection") }) } method only(c@:collection[`T1], if_non_singleton:&():`T2):T1|T2 { if(c.is_singleton, { c.pick_any }, if_non_singleton) } ---------- -- Copying ---------- --DOC Collections can be copied. This copy is a shallow copy; the --DOC elements of the collection are not copied. If the collection is --DOC immutable, the copy function usually returns the collection itself --DOC without doing a copy. -- need to figure out how to say "@:`S <= collection[`T]):S" signature copy(collection[`T]):collection[T]; ---------- -- Conversion ---------- --DOC Collection conversions may or may not involve a copy, e.g., --DOC `as_vector' on a `list' copies, while `as_vector' on a `vector' does not. -- overriding versions of these operations try to avoid copying if their -- argument is already of the desired kind; don't assume as_X does a copy! method as_ordered_collection(c@:collection[`T]):ordered_collection[T] { let v:m_vector[T] := new_m_vector[T](c.length); let var i:int := 0; c.do(&(elem:T){ v!i := elem; i := i.succ; }); v } method as_vector(c@:collection[`T]):vector[T] { c.as_i_vector } method as_i_vector(c@:collection[`T]):i_vector[T] { new_i_vector_init_from[T](c.as_ordered_collection, &(x:T){ x }) } method as_m_vector(c@:collection[`T]):m_vector[T] { new_m_vector_init_from[T](c.as_ordered_collection, &(x:T){ x }) } method as_m_indexed(c@:collection[`T]):m_indexed[T] { c.as_m_vector } method as_list_set(c@:collection[`T <= comparable[T]]):m_set[T] { let s := new_list_set[T](); s.add_all(c); s } ---------- -- Printing ---------- --DOC Various variations on printing a collection are available. The --DOC standard `print_string' behavior includes `open_brace', --DOC `elems_print_string', and then `close_brace'. By default --DOC `open_brace' contains the name of the collection and an open brace, --DOC `elems_print_string' consists of the elements of the collection, --DOC separated by the `elem_separator' (by default, a comma), and --DOC `close_brace' is a close brace. method print_string(c@:collection[`T]):string { c.open_brace || c.elems_print_string || c.close_brace } method print(c@:collection[`T]):void { c.open_brace.print; c.elems_print; c.close_brace.print; } signature collection_name(collection[`T]):string; method open_brace(c@:collection[`T]):string { c.collection_name || "{" } method close_brace(@:collection[`T]):string { "}" } method elems_print_string(c@:collection[`T]):string { let var str:string|collector[string] := ""; let var first:bool := true; do(c, &(e:T){ str := str && elem_print_string(c, e, first); first := false; }); str.flat_string } method elems_print(c@:collection[`T]):void { let var first:bool := true; do(c, &(e:T){ elem_print(c, e, first); first := false; }); } method elem_separator(@:collection[`T]):string { ", " } method elem_print_string(t@:collection[`T], elem:T, first:bool):string { if(first, { elem.print_string }, { t.elem_separator || elem.print_string }) } method elem_print(t@:collection[`T], elem:T, first:bool):void { elem_print_string(t, elem, first).print; } };