-- Copyright 1993-1998, by the Cecil Project -- Department of Computer Science and Engineering, University of Washington -- See the LICENSE file for license information. (--DOC An `m_set' implementation using an open hashing algorithm. Set elements must be hashable. The `new_[chained_]hash_set' methods optionally take a `max_size' argument, which is the expected maximum size of the set; the initial size of all newly-created sets is 0. All hashing set implementations automatically resize if the set grows too large or small. Warning: the `do_allowing_updates' function on `hash_set' (and `hash_table' and `hash_keyed_set', the other open-hash-table-based implementations) may not support more than one `add' (or `store' or `fetch_or_init' or any operation that increases the size of the collection) during the iteration, because they are blocked from resizing but they may need to resize to support multiple adds. --) module HashSet extends OpenHashTableImpl { template object hash_set[T <= hashable[T]] isa m_set[T], open_table[T]; method collection_name(@:hash_set[`T]):string { "hash_set" } method do(t@:hash_set[`T], c:&(T):void):void { t.key_vector.do(&(x:T){ if(x.is_valid_bucket_element, { eval(c, x); }); }); } method do_allowing_updates(t@:hash_set[`T], c:&(T):void):void { -- disable resizing during iteration allowing_updates.begin_allowing_updates(t); unwind_protect({ do(t, c); }, { allowing_updates.done_allowing_updates(t); if(t.should_grow, { t.resize(t.grown_size); }, { if(t.should_shrink, { t.resize(t.shrunk_size); }); }); }); } method includes(t@:hash_set[`T], x:T):bool { t.key_vector.buckets_in_probing_order_do(hash(x, t.key_vector.length), &(i:int,k:T){ if(k !== empty_hash_bucket, { if(k !== removed_hash_bucket & { x = k }, { ^ true }); }, { ^ false }); }); false } method add(t@:hash_set[`T], x:T):void { let var store_pos:int := -1; t.key_vector.buckets_in_probing_order_do(hash(x, t.key_vector.length), &(i:int,k:T){ if(k !== empty_hash_bucket, { if(k !== removed_hash_bucket, { if(x = k, { ^ }); }, { if(store_pos < 0, { store_pos := i; }); }); }, { t.length := t.length.succ; t.key_vector.store(if(store_pos < 0, { i }, { store_pos }), x); if(t.should_grow, { t.resize(t.grown_size); }); ^ }); }); assert(store_pos >= 0, "should have found a free bucket somewhere"); t.length := t.length.succ; t.key_vector.store(store_pos, x); if(t.should_grow, { t.resize(t.grown_size); }); } method remove(t@:hash_set[`T], x:T, if_absent:&():void):void { t.key_vector.buckets_in_probing_order_do(hash(x, t.key_vector.length), &(i:int,k:T){ if(k !== empty_hash_bucket, { if(k !== removed_hash_bucket & { x = k }, { t.remove_bucket(i); t.length := t.length.pred; if(t.should_shrink, { t.resize(t.shrunk_size); }); ^ }); }, { ^ eval(if_absent) }); }); eval(if_absent) } method remove_if(t@:hash_set[`T], pred:&(T):bool):int { let var count:int := 0; t.key_vector.do_associations(&(i:int, k:T){ if(k.is_valid_bucket_element & { eval(pred, k) }, { t.remove_bucket(i); t.length := t.length.pred; count := count + 1; }); }); if(t.should_shrink, { t.resize(t.shrunk_size); }); count; } method remove_any(t@:hash_set[`T], if_empty:&():`S):T|S { t.key_vector.do_associations(&(i:int, k:T){ if(k.is_valid_bucket_element, { let result:T := k; t.remove_bucket(i); t.length := t.length.pred; if(t.should_shrink, { t.resize(t.shrunk_size); }); ^ result }); }); eval(if_empty) } method remove_all(t@:hash_set[`T]):void { resend(t@open_table[T]) } method union(m1@:hash_set[`T], m2@:hash_set[T]):m_set[T] { -- If we had factory objects, then we could just use the union -- operation defined for sets. We only had to override to get the result -- to be a hash_set. let r:hash_set[T] := new_hash_set[T]((m1.length + m2.length) * 2 + 1); do(m1, &(e:T){ add_nonmember(r,e); }); do(m2, &(e:T){ add(r,e); }); r } method intersection(m1@:hash_set[`T], m2@:hash_set[T]):m_set[T] { -- Again, only overridden to get the result to be a hash_set. -- How many buckets should we put in the result? I chose the maximum -- of the number of buckets in the two args, but this may not be the right -- tradeoff. let r:hash_set[T] := new_hash_set[T](max(m1.key_vector.length, m2.key_vector.length)); if(m1.length > m2.length, { do(m1, &(e:T){ if(includes(m2,e), { add_nonmember(r,e); }); }); }, { do(m2, &(e:T){ if(includes(m1,e), { add_nonmember(r,e); }); }); }); r } protected method new_buckets(t@:hash_set[`T], size:int):m_indexed[m_set[T]] { new_m_vector_init[m_set[T]](size, &(i:int){ t.new_bucket }) } protected method new_bucket(t@:hash_set[`T]):m_set[T] { new_list_set[T]() } method new_hash_set[T <= hashable[T]]():hash_set[T] { new_hash_set[T](default_open_table_size) } method new_hash_set[T <= hashable[T]](size:int):hash_set[T] { let real_size := prime_table_size(size); concrete object isa hash_set[T] { key_vector := new_m_vector[T](real_size, cast[T](empty_hash_bucket)) } } method copy_empty(c@:hash_set[`T]):hash_set[T] { new_hash_set[T]() } method copy(t@:hash_set[`T]):hash_set[T] { concrete object isa hash_set[T] { key_vector := t.key_vector.copy, length := t.length } } private method resize(t@:hash_set[`T], new_size:int):void { let old_len:int := t.length; let old_key_vector:m_vector[T] := t.key_vector; t.key_vector := new_m_vector[T](new_size, cast[T](empty_hash_bucket)); t.length := 0; old_key_vector.do(&(k:T){ if(k.is_valid_bucket_element, { t.add_nonmember(k); }); }); assert(old_len = t.length, "should have the same elements"); } } -- end module HashSet