-- Copyright 1993-1998, by the Cecil Project -- Department of Computer Science and Engineering, University of Washington -- See the LICENSE file for license information. -- This file defines the general set data type, plus two implementations: -- a list-based implementation and a hash-table-based implementation. ---------- -- Abstract sets ---------- --DOC Sets are a specialization of unordered collections that --DOC explicitly disallow duplicates. abstract object set[T <= comparable[T]] isa unordered_collection[T]; extend type set[`T] subtypes set[`S >= T]; ---------- -- Basic set operations ---------- --DOC Sets support a collection of functional set operations which, given --DOC argument sets, return a new set. method union(m1@:set[`T], m2@:set[T]):m_set[T] { let r:m_set[T] := new_list_set[T](); do(m1, &(e:T){ add_nonmember(r,e); }); do(m2, &(e:T){ add(r,e); }); r } method intersection(m1@:set[`T], m2@:set[T]):m_set[T] { let r:m_set[T] := new_list_set[T](); do(m1, &(e:T){ if(includes(m2,e), { add_nonmember(r,e); }); }); r } method difference(m1@:set[`T], m2@:set[T]):m_set[T] { let r:m_set[T] := new_list_set[T](); do(m1, &(e:T){ if_false(includes(m2,e), { add_nonmember(r,e); }); }); r } method is_disjoint(m1@:set[`T], m2@:set[T]):bool { do(m1, &(e:T){ if(includes(m2,e), { ^ false }); }); true } method is_subset(m1@:set[`T], m2@:set[T]):bool { let i:set[T] := m1.intersection(m2); i.length = m1.length } method count(c@:set[`T], x:T):int { if(includes(c, x), { 1 }, { 0 }) } method collection_name(@:set[`T]):string { "set" } signature copy(set[`T]):set[T]; ---------- -- Immutable set behavior ---------- abstract object i_set[T <= comparable[T]] isa set[T], i_unordered_collection[T]; method collection_name(@:i_set[`T]):string { "i_set" } --DOC One standard implementation of an immutable set is the concrete --DOC object `empty_set'. -- Make this a named concrete object for convenience & efficiency concrete object empty_set[T <= comparable[T]] isa i_set[T]; method print_string(@:empty_set[`T]):string { "empty_set" } method copy(s@:empty_set[`T]):empty_set[T] { s } method copy_empty(s@:empty_set[`T]):empty_set[T] { s } method do(s@:empty_set[`T], c:&(T):void):void {} method do_allowing_updates(s@:empty_set[`T], c:&(T):void):void {} signature do_allowing_updates(empty_set[`T]|m_set[`T], c:&(T):void):void; method length(@:empty_set[`T]):int { 0 } method is_empty(@:empty_set[`T]):bool { true } ---------- -- Mutable set behavior ---------- --DOC Mutable sets can be added to. abstract object m_set[T <= comparable[T]] isa set[T], m_unordered_collection[T], removable_collection[T]; method collection_name(@:m_set[`T]):string { "m_set" } method add(m@:m_set[`T], x:T):void (** no_inline **) { -- only add to set if not already present if_false(includes(m, x), { add_nonmember(m, x); }); } -- Adds the element, and returns true if it wasn't there before it was added method check_if_missing_and_add(m@:m_set[`T], x:T):bool { if_false(includes(m, x), { add_nonmember(m, x); true }, { false }) } signature add_functional(m_set[`T], x:T):m_set[T]; -- refining result type signature copy_empty(m_set[`T]):m_set[T]; -- refining result type method copy(c@:m_set[`T]):m_set[T] { let copy:m_set[T] := c.copy_empty; copy.add_all_nonmember(c); copy } --DOCTEX \subsubsection{Set implementations} ---------- -- a list-based m_set implementation ---------- --DOC `list_set' is an implementation of `m_set' using a linked list as the core --DOC representation. template object list_set[T <= comparable[T]] isa m_set[T]; private extend list_set[`T] isa list_bag[T]; private field signature elems(list_set[`T]):m_list[T]; method collection_name(@:list_set[`T]):string { "list_set" } method add(m@:list_set[`T], x:T):void { resend(m@m_set[T], x); } method add_nonmember(m@:list_set[`T], x:T):void { -- assume the element isn't already present. -- this is list_bag::add's code; want to resend and change name of msg m.elems.add_last(x); } method new_list_set[T <= comparable[T]]():list_set[T] { concrete object isa list_set[T] } method copy_empty(c@:list_set[`T]):list_set[T] { new_list_set[T]() } ---------- -- a chaining-hash-table-based set implementation ---------- module ChainedHashSet { --DOC `chained_hash_set' is an `m_set' implementation using a closed --DOC hashing algorithm. Set elements must be hashable. template object chained_hash_set[T <= hashable[T]] isa m_set[T]; private var field buckets(t@:chained_hash_set[`T]):m_indexed[m_set[T]] := t.new_buckets(default_chained_hash_set_size); private put var field length(t@:chained_hash_set[`T]):int := 0; method collection_name(@:chained_hash_set[`T]):string { "chained_hash_set" } private method bucket(t@:chained_hash_set[`T], x:T):m_set[T] { t.buckets ! hash(x, t.buckets.length) } method do(t@:chained_hash_set[`T], c:&(T):void):void { do(t.buckets, &(b:m_set[T]){ do(b, c); }); } method do_allowing_updates(t@:chained_hash_set[`T], c:&(T):void):void { -- disable resizing during iteration allowing_updates.begin_allowing_updates(t); unwind_protect({ do(t, c); }, { allowing_updates.done_allowing_updates(t); if(t.should_grow, { t.resize(t.grown_size); }, { if(t.should_shrink, { t.resize(t.shrunk_size); }); }); }); } method includes(t@:chained_hash_set[`T], x:T):bool { t.bucket(x).includes(x) } method add(t@:chained_hash_set[`T], x:T):void { let b:m_set[T] := bucket(t, x); let old_length:int := b.length; b.add(x); t.length := t.length - old_length + b.length; if(t.should_grow, { t.resize(t.grown_size); }); } method add_nonmember(t@:chained_hash_set[`T], x:T):void { -- assume the element isn't already present let b:m_set[T] := bucket(t, x); b.add_nonmember(x); t.length := t.length.succ; if(t.should_grow, { t.resize(t.grown_size); }); } method remove(t@:chained_hash_set[`T], x:T, if_absent:&():void):void { let b:m_set[T] := bucket(t, x); let old_length:int := b.length; b.remove(x, { eval(if_absent); ^ }); t.length := t.length - old_length + b.length; if(t.should_shrink, { t.resize(t.shrunk_size); }); } method remove_if(t@:chained_hash_set[`T], pred:&(T):bool):int { let var count:int := 0; t.buckets.do(&(b:m_set[T]){ count := count + b.remove_if(pred); }); t.length = t.length - count; if(t.should_shrink, { t.resize(t.shrunk_size); }); count } method remove_any(t@:chained_hash_set[`T], if_empty:&():`S):T|S { t.buckets.do(&(ms:m_set[T]){ if(ms.non_empty, { t.length := t.length.pred; let result:T := remove_any(ms); if(t.should_shrink, { t.resize(t.shrunk_size); }); ^ result }); }); eval(if_empty) } method remove_all(t@:chained_hash_set[`T]):void { t.length := 0; if(t.should_shrink, { -- will create fresh, empty buckets t.resize(t.shrunk_size); }, { -- need to clear those buckets that remain t.buckets.do(&(ms:m_set[T]){ ms.remove_all; }); }); } method union(m1@:chained_hash_set[`T], m2@:chained_hash_set[T]):m_set[T] { -- If we had factory objects, then we could just use the union -- operation defined for sets. We only had to override to get the result -- to be a chained_hash_set. let r:chained_hash_set[T] := new_chained_hash_set[T](max(m1.buckets.length, m2.buckets.length)); do(m1, &(e:T){ add_nonmember(r,e); }); do(m2, &(e:T){ add(r,e); }); r } method intersection(m1@:chained_hash_set[`T], m2@:chained_hash_set[T]):m_set[T] { -- Again, only overridden to get the result to be a chained_hash_set. -- How many buckets should we put in the result? I chose the average -- of the number of buckets in the two args, but this may not be the right -- tradeoff. let r:chained_hash_set[T] := new_chained_hash_set[T](average(m1.buckets.length, m2.buckets.length)); if(m1.length > m2.length, { do(m1, &(e:T){ if(includes(m2,e), { add_nonmember(r,e); }); }); }, { do(m2, &(e:T){ if(includes(m1,e), { add_nonmember(r,e); }); }); }); r } protected method new_buckets(t@:chained_hash_set[`T], size:int):m_indexed[m_set[T]] { new_m_vector_init[m_set[T]](max(1,size), &(i:int){ t.new_bucket }) } protected method new_bucket(t@:chained_hash_set[`T]):m_set[T] { new_list_set[T]() } let var default_chained_hash_set_size:int := 1; method new_chained_hash_set[T <= hashable[T]]():chained_hash_set[T] { new_chained_hash_set[T](default_chained_hash_set_size) } method new_chained_hash_set[T <= hashable[T]](size:int):chained_hash_set[T] { let t:chained_hash_set[T] := concrete object isa chained_hash_set[T]; t.buckets := t.new_buckets(size); t } method copy_empty(c@:chained_hash_set[`T]):chained_hash_set[T] { new_chained_hash_set[T]() } private method should_grow(t@:chained_hash_set[`T]):bool { -- more than one elem per bucket... allowing_updates.updates_allowed(t).not & { t.length > 5 & { t.length > t.buckets.length } } } private method grown_size(t@:chained_hash_set[`T]):int { average(t.buckets.length * 2, t.length) } private method should_shrink(t@:chained_hash_set[`T]):bool { -- less than 1/5 full... allowing_updates.updates_allowed(t).not & { t.buckets.length > 8 & { t.length * 5 < t.buckets.length } } } private method shrunk_size(t@:chained_hash_set[`T]):int { average(t.buckets.length / 2, t.length) } private method resize(t@:chained_hash_set[`T], new_size:int):void { let old_len:int := t.length; let old_buckets:m_indexed[m_set[T]] := t.buckets; t.buckets := t.new_buckets(new_size); t.length := 0; old_buckets.do(&(s:m_set[T]){ s.do(&(x:T){ t.add_nonmember(x); }); }); assert(old_len = t.length, "should have the same elements"); } } -- end module ChainedHashSet