-- Copyright 1993-1998, by the Cecil Project
-- Department of Computer Science and Engineering, University of Washington
-- See the LICENSE file for license information.

-- This file defines the general set data type, plus two implementations:
-- a list-based implementation and a hash-table-based implementation.


----------
-- Abstract sets
----------

--DOC Sets are a specialization of unordered collections that
--DOC explicitly disallow duplicates.
abstract object set[T <= comparable[T]] isa unordered_collection[T];
extend type set[`T] subtypes set[`S >= T];

----------
-- Basic set operations
----------

--DOC Sets support a collection of functional set operations which, given
--DOC argument sets, return a new set.

method union(m1@:set[`T], m2@:set[T]):m_set[T] {
    let r:m_set[T] := new_list_set[T]();
    do(m1, &(e:T){ add_nonmember(r,e); });
    do(m2, &(e:T){ add(r,e); });
    r }
method intersection(m1@:set[`T], m2@:set[T]):m_set[T] {
    let r:m_set[T] := new_list_set[T]();
    do(m1, &(e:T){ if(includes(m2,e), { add_nonmember(r,e); }); });
    r }
method difference(m1@:set[`T], m2@:set[T]):m_set[T] {
    let r:m_set[T] := new_list_set[T]();
    do(m1, &(e:T){ if_false(includes(m2,e), { add_nonmember(r,e); }); });
    r }
method is_disjoint(m1@:set[`T], m2@:set[T]):bool {
    do(m1, &(e:T){ if(includes(m2,e), { ^ false }); });
    true }
method is_subset(m1@:set[`T], m2@:set[T]):bool {
    let i:set[T] := m1.intersection(m2);
    i.length = m1.length }

method count(c@:set[`T], x:T):int { if(includes(c, x), { 1 }, { 0 }) }

method collection_name(@:set[`T]):string { "set" }

signature copy(set[`T]):set[T];


----------
-- Immutable set behavior
----------

abstract object i_set[T <= comparable[T]] isa set[T], i_unordered_collection[T];

method collection_name(@:i_set[`T]):string { "i_set" }

--DOC One standard implementation of an immutable set is the concrete
--DOC object `empty_set'.

-- Make this a named concrete object for convenience & efficiency
concrete object empty_set[T <= comparable[T]] isa i_set[T];
  method print_string(@:empty_set[`T]):string { "empty_set" }
  method copy(s@:empty_set[`T]):empty_set[T] { s }
  method copy_empty(s@:empty_set[`T]):empty_set[T] { s }
  method do(s@:empty_set[`T], c:&(T):void):void {}
  method do_allowing_updates(s@:empty_set[`T], c:&(T):void):void {}
  signature do_allowing_updates(empty_set[`T]|m_set[`T], c:&(T):void):void;
  method length(@:empty_set[`T]):int { 0 }
  method is_empty(@:empty_set[`T]):bool { true }


----------
-- Mutable set behavior
----------

--DOC Mutable sets can be added to.

abstract object m_set[T <= comparable[T]] isa set[T],
					      m_unordered_collection[T],
					      removable_collection[T];

method collection_name(@:m_set[`T]):string { "m_set" }
method add(m@:m_set[`T], x:T):void (** no_inline **) {
    -- only add to set if not already present
    if_false(includes(m, x), { add_nonmember(m, x); });
}
-- Adds the element, and returns true if it wasn't there before it was added
method check_if_missing_and_add(m@:m_set[`T], x:T):bool {
    if_false(includes(m, x),
	     { add_nonmember(m, x);  true },
	     { false }) }

signature add_functional(m_set[`T], x:T):m_set[T]; -- refining result type
signature copy_empty(m_set[`T]):m_set[T]; -- refining result type
method copy(c@:m_set[`T]):m_set[T] {
    let copy:m_set[T] := c.copy_empty;
    copy.add_all_nonmember(c);
    copy }

--DOCTEX \subsubsection{Set implementations}

----------
-- a list-based m_set implementation
----------

--DOC `list_set' is an implementation of `m_set' using a linked list as the core
--DOC representation.
template object list_set[T <= comparable[T]] isa m_set[T];
  private extend list_set[`T] isa list_bag[T];
private field signature elems(list_set[`T]):m_list[T];
method collection_name(@:list_set[`T]):string { "list_set" }
method add(m@:list_set[`T], x:T):void { resend(m@m_set[T], x); }
method add_nonmember(m@:list_set[`T], x:T):void {
    -- assume the element isn't already present.
    -- this is list_bag::add's code; want to resend and change name of msg
    m.elems.add_last(x);
}

method new_list_set[T <= comparable[T]]():list_set[T] {
    concrete object isa list_set[T] }

method copy_empty(c@:list_set[`T]):list_set[T] { new_list_set[T]() }


----------
-- a chaining-hash-table-based set implementation
----------

module ChainedHashSet {

--DOC `chained_hash_set' is an `m_set' implementation using a closed
--DOC hashing algorithm.  Set elements must be hashable.

template object chained_hash_set[T <= hashable[T]] isa m_set[T];
private var field buckets(t@:chained_hash_set[`T]):m_indexed[m_set[T]]
				:= t.new_buckets(default_chained_hash_set_size);
private put var field length(t@:chained_hash_set[`T]):int := 0;

method collection_name(@:chained_hash_set[`T]):string { "chained_hash_set" }

private method bucket(t@:chained_hash_set[`T], x:T):m_set[T] {
    t.buckets ! hash(x, t.buckets.length) }

method do(t@:chained_hash_set[`T], c:&(T):void):void {
    do(t.buckets, &(b:m_set[T]){ do(b, c); });
}

method do_allowing_updates(t@:chained_hash_set[`T], c:&(T):void):void {
    -- disable resizing during iteration
    allowing_updates.begin_allowing_updates(t);
    unwind_protect({
	do(t, c);
    }, {
	allowing_updates.done_allowing_updates(t);
	if(t.should_grow, {
	    t.resize(t.grown_size);
	}, {
	    if(t.should_shrink, {
		t.resize(t.shrunk_size);
	    });
	});
    });
}

method includes(t@:chained_hash_set[`T], x:T):bool {
    t.bucket(x).includes(x) }

method add(t@:chained_hash_set[`T], x:T):void {
    let b:m_set[T] := bucket(t, x);
    let old_length:int := b.length;
    b.add(x);
    t.length := t.length - old_length + b.length;
    if(t.should_grow, { t.resize(t.grown_size); });
}
method add_nonmember(t@:chained_hash_set[`T], x:T):void {
    -- assume the element isn't already present
    let b:m_set[T] := bucket(t, x);
    b.add_nonmember(x);
    t.length := t.length.succ;
    if(t.should_grow, { t.resize(t.grown_size); });
}

method remove(t@:chained_hash_set[`T], x:T, if_absent:&():void):void {
    let b:m_set[T] := bucket(t, x);
    let old_length:int := b.length;
    b.remove(x, { eval(if_absent); ^ });
    t.length := t.length - old_length + b.length;
    if(t.should_shrink, { t.resize(t.shrunk_size); });
}

method remove_if(t@:chained_hash_set[`T], pred:&(T):bool):int {
    let var count:int := 0;
    t.buckets.do(&(b:m_set[T]){
	count := count + b.remove_if(pred);
    });
    t.length = t.length - count;
    if(t.should_shrink, { t.resize(t.shrunk_size); });
    count
}

method remove_any(t@:chained_hash_set[`T], if_empty:&():`S):T|S {
    t.buckets.do(&(ms:m_set[T]){
	if(ms.non_empty, {
	    t.length := t.length.pred;
	    let result:T := remove_any(ms);
	    if(t.should_shrink, { t.resize(t.shrunk_size); });
	    ^ result
	});
    });
    eval(if_empty) }

method remove_all(t@:chained_hash_set[`T]):void {
    t.length := 0;
    if(t.should_shrink, {
	-- will create fresh, empty buckets
	t.resize(t.shrunk_size);
    }, {
	-- need to clear those buckets that remain
	t.buckets.do(&(ms:m_set[T]){ ms.remove_all; });
    });
}

method union(m1@:chained_hash_set[`T], m2@:chained_hash_set[T]):m_set[T] {
    -- If we had factory objects, then we could just use the union
    -- operation defined for sets.  We only had to override to get the result
    -- to be a chained_hash_set.
    let r:chained_hash_set[T] :=
      new_chained_hash_set[T](max(m1.buckets.length, m2.buckets.length));
    do(m1, &(e:T){ add_nonmember(r,e); });
    do(m2, &(e:T){ add(r,e); });
    r }
method intersection(m1@:chained_hash_set[`T],
		    m2@:chained_hash_set[T]):m_set[T] {
    -- Again, only overridden to get the result to be a chained_hash_set.
    -- How many buckets should we put in the result?  I chose the average
    -- of the number of buckets in the two args, but this may not be the right
    -- tradeoff.
    let r:chained_hash_set[T] :=
      new_chained_hash_set[T](average(m1.buckets.length, m2.buckets.length));
    if(m1.length > m2.length, {
	do(m1, &(e:T){ if(includes(m2,e), { add_nonmember(r,e); }); });
    }, {
	do(m2, &(e:T){ if(includes(m1,e), { add_nonmember(r,e); }); });
    });
    r }

protected method new_buckets(t@:chained_hash_set[`T],
			     size:int):m_indexed[m_set[T]] {
    new_m_vector_init[m_set[T]](max(1,size), &(i:int){ t.new_bucket }) }
protected method new_bucket(t@:chained_hash_set[`T]):m_set[T] {
    new_list_set[T]() }

let var default_chained_hash_set_size:int := 1;

method new_chained_hash_set[T <= hashable[T]]():chained_hash_set[T] {
    new_chained_hash_set[T](default_chained_hash_set_size) }
method new_chained_hash_set[T <= hashable[T]](size:int):chained_hash_set[T] {
    let t:chained_hash_set[T] := concrete object isa chained_hash_set[T];
    t.buckets := t.new_buckets(size);
    t }


method copy_empty(c@:chained_hash_set[`T]):chained_hash_set[T] {
    new_chained_hash_set[T]() }

private method should_grow(t@:chained_hash_set[`T]):bool {
    -- more than one elem per bucket...
    allowing_updates.updates_allowed(t).not &
      { t.length > 5 & { t.length > t.buckets.length } } }
private method grown_size(t@:chained_hash_set[`T]):int {
    average(t.buckets.length * 2, t.length) }

private method should_shrink(t@:chained_hash_set[`T]):bool {
    -- less than 1/5 full...
    allowing_updates.updates_allowed(t).not &
      { t.buckets.length > 8 & { t.length * 5 < t.buckets.length } } }
private method shrunk_size(t@:chained_hash_set[`T]):int {
    average(t.buckets.length / 2, t.length) }

private method resize(t@:chained_hash_set[`T], new_size:int):void {
    let old_len:int := t.length;
    let old_buckets:m_indexed[m_set[T]] := t.buckets;
    t.buckets := t.new_buckets(new_size);
    t.length := 0;
    old_buckets.do(&(s:m_set[T]){ s.do(&(x:T){ t.add_nonmember(x); }); });
    assert(old_len = t.length, "should have the same elements");
}

} -- end module ChainedHashSet