-- Copyright 1993-1998, by the Cecil Project
-- Department of Computer Science and Engineering, University of Washington
-- See the LICENSE file for license information.

(--DOC An `m_set' implementation using an open hashing algorithm. Set
       elements must be hashable.

       The `new_[chained_]hash_set' methods optionally take a `max_size'
       argument, which is the expected maximum size of the set; the
       initial size of all newly-created sets is 0. All hashing set
       implementations automatically resize if the set grows too large
       or small. Warning: the `do_allowing_updates' function on `hash_set'
       (and `hash_table' and `hash_keyed_set', the other
       open-hash-table-based implementations) may not support more
       than one `add' (or `store' or `fetch_or_init' or any operation that
       increases the size of the collection) during the iteration,
       because they are blocked from resizing but they may need to
       resize to support multiple adds.
--)

module HashSet extends OpenHashTableImpl {

template object hash_set[T <= hashable[T]] isa m_set[T], open_table[T];

method collection_name(@:hash_set[`T]):string { "hash_set" }

method do(t@:hash_set[`T], c:&(T):void):void {
    t.key_vector.do(&(x:T){
	if(x.is_valid_bucket_element, { eval(c, x); });
    });
}

method do_allowing_updates(t@:hash_set[`T], c:&(T):void):void {
    -- disable resizing during iteration
    allowing_updates.begin_allowing_updates(t);
    unwind_protect({
	do(t, c);
    }, {
	allowing_updates.done_allowing_updates(t);
	if(t.should_grow, {
	    t.resize(t.grown_size);
	}, {
	    if(t.should_shrink, {
		t.resize(t.shrunk_size);
	    });
	});
    });
}

method includes(t@:hash_set[`T], x:T):bool {
    t.key_vector.buckets_in_probing_order_do(hash(x, t.key_vector.length),
					     &(i:int,k:T){
	if(k !== empty_hash_bucket, {
	    if(k !== removed_hash_bucket & { x = k }, {
		^ true
	    });
	}, {
	    ^ false
	});
    });
    false }

method add(t@:hash_set[`T], x:T):void {
    let var store_pos:int := -1;
    t.key_vector.buckets_in_probing_order_do(hash(x, t.key_vector.length),
					     &(i:int,k:T){
	if(k !== empty_hash_bucket, {
	    if(k !== removed_hash_bucket, {
		if(x = k, { ^ });
	    }, {
		if(store_pos < 0, { store_pos := i; });
	    });
	}, {
	    t.length := t.length.succ;
	    t.key_vector.store(if(store_pos < 0, { i }, { store_pos }), x);
	    if(t.should_grow, { t.resize(t.grown_size); });
	    ^
	});
    });
    assert(store_pos >= 0, "should have found a free bucket somewhere");
    t.length := t.length.succ;
    t.key_vector.store(store_pos, x);
    if(t.should_grow, { t.resize(t.grown_size); });
}

method remove(t@:hash_set[`T], x:T, if_absent:&():void):void {
    t.key_vector.buckets_in_probing_order_do(hash(x, t.key_vector.length),
					     &(i:int,k:T){
	if(k !== empty_hash_bucket, {
	    if(k !== removed_hash_bucket & { x = k }, {
		t.remove_bucket(i);
		t.length := t.length.pred;
		if(t.should_shrink, { t.resize(t.shrunk_size); });
		^
	    });
	}, {
	    ^ eval(if_absent)
	});
    });
    eval(if_absent) }

method remove_if(t@:hash_set[`T], pred:&(T):bool):int {
    let var count:int := 0;
    t.key_vector.do_associations(&(i:int, k:T){
	if(k.is_valid_bucket_element & { eval(pred, k) }, {
	    t.remove_bucket(i);
	    t.length := t.length.pred;
	    count := count + 1;
	});
    });
    if(t.should_shrink, { t.resize(t.shrunk_size); });
    count;
}

method remove_any(t@:hash_set[`T], if_empty:&():`S):T|S {
    t.key_vector.do_associations(&(i:int, k:T){
	if(k.is_valid_bucket_element, {
	    let result:T := k;
	    t.remove_bucket(i);
	    t.length := t.length.pred;
	    if(t.should_shrink, { t.resize(t.shrunk_size); });
	    ^ result
	});
    });
    eval(if_empty) }

method remove_all(t@:hash_set[`T]):void { resend(t@open_table[T]) }

method union(m1@:hash_set[`T], m2@:hash_set[T]):m_set[T] {
    -- If we had factory objects, then we could just use the union
    -- operation defined for sets.  We only had to override to get the result
    -- to be a hash_set.
    let r:hash_set[T] :=
      new_hash_set[T]((m1.length + m2.length) * 2 + 1);
    do(m1, &(e:T){ add_nonmember(r,e); });
    do(m2, &(e:T){ add(r,e); });
    r }

method intersection(m1@:hash_set[`T], m2@:hash_set[T]):m_set[T] {
    -- Again, only overridden to get the result to be a hash_set.
    -- How many buckets should we put in the result?  I chose the maximum
    -- of the number of buckets in the two args, but this may not be the right
    -- tradeoff.
    let r:hash_set[T] :=
      new_hash_set[T](max(m1.key_vector.length, m2.key_vector.length));
    if(m1.length > m2.length, {
	do(m1, &(e:T){ if(includes(m2,e), { add_nonmember(r,e); }); });
    }, {
	do(m2, &(e:T){ if(includes(m1,e), { add_nonmember(r,e); }); });
    });
    r }

protected method new_buckets(t@:hash_set[`T], size:int):m_indexed[m_set[T]] {
    new_m_vector_init[m_set[T]](size, &(i:int){ t.new_bucket }) }
protected method new_bucket(t@:hash_set[`T]):m_set[T] { new_list_set[T]() }

method new_hash_set[T <= hashable[T]]():hash_set[T] {
    new_hash_set[T](default_open_table_size) }
method new_hash_set[T <= hashable[T]](size:int):hash_set[T] {
    let real_size := prime_table_size(size);
    concrete object isa hash_set[T] {
	key_vector :=
	  new_m_vector[T](real_size, cast[T](empty_hash_bucket)) } }

method copy_empty(c@:hash_set[`T]):hash_set[T] {
    new_hash_set[T]() }
method copy(t@:hash_set[`T]):hash_set[T] {
    concrete object isa hash_set[T] {
	key_vector := t.key_vector.copy,
	length := t.length } }


private method resize(t@:hash_set[`T], new_size:int):void {
    let old_len:int := t.length;
    let old_key_vector:m_vector[T] := t.key_vector;
    t.key_vector := new_m_vector[T](new_size, cast[T](empty_hash_bucket));
    t.length := 0;
    old_key_vector.do(&(k:T){
	if(k.is_valid_bucket_element, { t.add_nonmember(k); });
    });
    assert(old_len = t.length, "should have the same elements");
}

} -- end module HashSet