-- Copyright 1993-1998, by the Cecil Project
-- Department of Computer Science and Engineering, University of Washington
-- See the LICENSE file for license information.

-- These implement open hash tables.

module OpenHashTableImpl {

concrete object empty_hash_bucket;
concrete object removed_hash_bucket;


let use_linear_probing:bool := true;  -- otherwise use quadratic probing

method buckets_in_probing_order_do(v@:vector[`T], start_idx@:int,
				   cl:&(int,T):void):void (** inline **) {
    if(use_linear_probing, {
	v.buckets_in_linear_probing_order_do(start_idx, cl);
    }, {
	v.buckets_in_quadratic_probing_order_do(start_idx, cl);
    });
}

-------------------------------------------------------------------------------

-- Linear Probing:

-- Implements simple linear probing.  We could experiment with more interesting
-- probing schemes.  This returns when the indices wrap around, at which
-- point the client has to take appropriate action based on the operation
-- they are implementing.
method buckets_in_linear_probing_order_do(v@:vector[`T], start_idx_oop@:int,
					  cl:&(int,T):void
					  ):void (** inline **) {
(-- old version: no bounds checks, but calls closure in two places:
    v.range_do(start_idx, v.length, cl);
    v.range_do(0, start_idx, cl);
--)
(-- new version, in source code; calls (inlined) closure in only one spot
    let var idx:int := start_idx;
    let len := v.length;
    if(idx < 0 | { idx >= len }, { ^; });
    loop({
	eval(cl, idx, v!idx);
	idx := idx.succ;
	if(idx >= len, {
	    -- wrap around
	    idx := 0;
	});
	if(idx = start_idx, {
	    -- we've come back to the start
	    -- (do this test in addition to wraparound, to handle the
	    --  case of a 1-long vector that wraps around to the start)
	    ^;
	});
    });
--)
(-- unfortunately, range analysis didn't seem to understand what the range
    of idx was above within the loop, so a bounds check (and error code)
    was introduced at the v!idx call.  so below this is rewritten as an RTL
    primitive for speed and code compactness.
--)
  prim rtl {
      decl int start_idx := unbox_int start_idx_oop;
      decl int idx := start_idx;
      decl int len := num_elems_int v;
      if idx <_int_log 0 goto done;
      if idx >=_int_log len goto done;
    label loop;
      decl OOP elem := v[idx] OOP;
      decl OOP idx_oop := box_int idx;
      send eval(cl, idx_oop, elem);
      idx := idx +_int 1;
      if idx <_int_log len goto no_wrap_around;
      idx := 0;
    label no_wrap_around;
      if idx !=_int_log start_idx goto loop;
    label done;
      return void;
  }
}

-- These only work for linear probing:

-- Returns index of next bucket to probe, given the current index
method next_probe(v@:vector[`T], idx:int):int (** inline **) {
    assert(use_linear_probing, "shouldn't be used for non-linear probing");
    let next_idx:int := idx + 1;
    if(next_idx >= v.length, { 0 }, { next_idx }) }

-- Returns index of next bucket to probe when going backwards
method previous_probe(v@:vector[`T], idx:int):int (** inline **) {
    assert(use_linear_probing, "shouldn't be used for non-linear probing");
    let prev_idx:int := idx - 1;
    if(prev_idx < 0, { v.length - 1 }, { prev_idx }) }

-------------------------------------------------------------------------------

-- Quadratic Probing:

(-- an alternative probing function that uses quadratic probing.
    i.e. search elements in this order:
      start_idx, start_idx+1^2, start_idx-1^2, start_idx+2^2, start_idx-2^2,
      start_idx+3^2, start_idx-3^2, etc. (all %v.length).
    note: next_probe and previous_probe can't be called
    with non-linear scanning, since we can't tell (and it's not unique!
    that's the whole point) what the next/previous bucket will be.
--)
method buckets_in_quadratic_probing_order_do(v@:vector[`T], start_idx@:int,
					     cl:&(int,T):void
					     ):void (** inline **) {
(--  nice, high-level code:
    eval(cl, start_idx, v!start_idx);
    let len := v.length;
    for(1, round_up(sqrt(len), 2)/2, &(i:int){
	let i_squared := square(i);
	let idx := (start_idx + i_squared) % len;
	eval(cl, idx, v!idx);
	let idx := (start_idx - i_squared) % len;
	eval(cl, idx, v!idx);
    });
--)
    -- low-level uglier code, written to avoid bounds checks and to
    -- only invoke the argument closure in one place (to avoid code expansion
    -- through repeated inlining of closures)
    prim rtl {
	decl int len := num_elems_int v;
        if len =_int_log 0 goto done;
	decl int i := 0;
        decl int signed_i_squared := 0;
        decl int untagged_start_idx := unbox_int start_idx;
      label loophead;
        decl int idx := untagged_start_idx +_int signed_i_squared;
	idx := idx %_int len;
        decl OOP val := v[idx] OOP; -- no bounds check!
        decl OOP tagged_idx := box_int idx;
	send eval(cl, tagged_idx, val);  -- only one call to eval
	if signed_i_squared <=_int_log 0 goto non_positive;
        -- was positive: negate and retry
        signed_i_squared := 0 -_int signed_i_squared;
        goto loophead;
      label non_positive;
	-- was 0 or negative: increment and compute new positive squared value
	i := i +_int 1;
	signed_i_squared := i *_int i;
	if signed_i_squared <_int_log len goto loophead;
      label done;
        return void;
    }
}
--)

-------------------------------------------------------------------------------

-- Other operations:

-- A bunch of prime numbers.  Goes up quickly for small table sizes (factor
-- of 3), then slows down to a factor of 2 around 1000, then grows by 3/2
-- after 4000.
let good_table_sizes:vector[int] :=
  [11, 47, 197, 479, 1031, 2063,  3079, 4721, 6473, 9839, 15559, 24077,
   35729, 45007, 70001, 115001, 180001, 270001, 405001, 607517, 911269,
   1366877, 2050327, 3075481, 4613209, 6919807, 10379711, 15569581, 23354369,
   35031527, 52547321, 78820913, 118231363, 177347039, 266020583, 399030859,
   598546241, 897819337];

let var default_open_table_size:int := good_table_sizes.first;

abstract object open_table[Key];
private var field key_vector(t@:open_table[`Key]):m_vector[Key]
   := new_m_vector[Key](default_open_table_size, cast[Key](empty_hash_bucket));
private put var field length(t@:open_table[`Key]):int := 0;

private method should_grow(t@:open_table[`Key]):bool {
    -- more than half full...
    (t.length * 2) > t.key_vector.length &
      { allowing_updates.updates_allowed(t).not } }
private method grown_size(t@:open_table[`Key]):int {
    let old_size:int := t.key_vector.length;
    good_table_sizes.find(&(new_size:int){ new_size > old_size }) }

private method prime_table_size(size@:int):int {
    good_table_sizes.find(&(new_size:int){ new_size >= size }) }

private method should_shrink(t@:open_table[`Key]):bool {
    -- less than 1/6 full...
    t.key_vector.length >= 20 & { t.length * 6 < t.key_vector.length & {
	allowing_updates.updates_allowed(t).not } } }
private method shrunk_size(t@:open_table[`Key]):int {
    let suggested_new_len:int := t.length * 3;
    good_table_sizes.find(&(new_size:int){ new_size > suggested_new_len }) }

-- When we remove an element, we want to try to avoid sticking in
-- removed_hash_bucket, because they slow down future table operations.
-- We are trying to keep the number of removed_hash_bucket values to a minimum.
method remove_bucket(t@:open_table[`Key], idx:int):void {
    if(use_linear_probing, {
	let var next_idx:int := t.key_vector.next_probe(idx);
	if(t.key_vector.fetch(next_idx) == empty_hash_bucket, {
	    let var cur_idx:int := idx;
	    until_false({
		t.key_vector.store(cur_idx, cast[Key](empty_hash_bucket));
		next_idx := cur_idx;
		cur_idx := t.key_vector.previous_probe(cur_idx);
	    }, { t.key_vector.fetch(cur_idx) == removed_hash_bucket });
	    ^
	});
    });
    t.key_vector.store(idx, cast[Key](removed_hash_bucket));
}

private method is_valid_bucket_element(t:any):bool {
    t !== empty_hash_bucket & { t !== removed_hash_bucket } }

method remove_all(t@:open_table[`Key]):void {
    t.length := 0;
    if(t.should_shrink, {
	-- will create fresh, empty buckets
	t.key_vector :=
	  new_m_vector[Key](t.shrunk_size, cast[Key](empty_hash_bucket));
    }, {
	-- need to clear those buckets that remain
	t.key_vector.keys_do(&(i:int){
	    t.key_vector.store(i, cast[Key](empty_hash_bucket));
	});
    });
}


-- return a histogram computing, for each key in the key vector, how many
-- probes are necessary to find it.  this gives a sense of how many conflicts
-- there are in the table
method probe_histogram(t@:open_table[`Key <= hashable[Key]]):histogram[int] {
    let h := new_histogram[int]();
    let range := t.key_vector.length;
    t.key_vector.do(&(key:Key){
	if(key.is_valid_bucket_element, {
	    exit(&(exit:&():none){
		let var num_probes:int := 1;
		t.key_vector.buckets_in_probing_order_do(hash(key, range),
							 &(i:int,k:Key){
		    assert(k !== empty_hash_bucket, "should have found key");
		    if(k !== removed_hash_bucket & { key = k }, {
			h.increment(num_probes);
			eval(exit);
		    });
		    num_probes := num_probes.succ;
		});
		error("should have found key");
	    });
	});
    });
    h }

} -- end module OpenHashTableImpl


module HashTable extends OpenHashTableImpl {

--DOC `hash_table' is an implementation of tables that uses an open
--DOC hashing algorithm, for hashable keys.

-- hash tables using probing, implemented with two parallel vectors to hold
-- the key_vector and value_vector.  Two special sentinel objects,
-- empty_hash_bucket and removed_hash_bucket, are used to indicate special
-- bucket states.  These tables are very space efficient.

-- NOTE: the initial sizes and shrinking and growing sizes must be selected
-- so that there is always at least one empty bucket in the table.  The
-- fetch and store operations will go into an infinite loop if such a bucket
-- does not exist.
template object hash_table[Key <= hashable[Key], Value]
			isa m_removable_table[Key,Value], open_table[Key];
private var field value_vector(t@:hash_table[`Key,`Value]):m_vector[Value]
   := new_m_vector[Value](default_open_table_size,
      	    	    	  cast[Value](empty_hash_bucket));

method do_associations(t@:hash_table[`Key,`Value], c:&(Key,Value):void):void {
    do(t.key_vector, t.value_vector, &(k:Key,v:Value){
	if(k.is_valid_bucket_element, { eval(c, k, v); });
    });
}

method do_associations_allowing_updates(t@:hash_table[`Key,`Value],
					c:&(Key,Value):void):void {
    -- disable resizing during iteration
    allowing_updates.begin_allowing_updates(t);
    unwind_protect({
	do_associations(t, c);
    }, {
	allowing_updates.done_allowing_updates(t);
	if(t.should_grow, {
	    t.resize(t.grown_size);
	}, {
	    if(t.should_shrink, {
		t.resize(t.shrunk_size);
	    });
	});
    });
}

method fetch(t@:hash_table[`Key,`Value], key:Key, if_absent:&():Value):Value {
    t.key_vector.buckets_in_probing_order_do(hash(key, t.key_vector.length),
					     &(i:int,k:Key){
	if(k !== empty_hash_bucket, {
	    if(k !== removed_hash_bucket & { key = k }, {
		^ t.value_vector.fetch(i)
	    });
	}, {
	    ^ eval(if_absent)
	});
    });
    eval(if_absent) }

-- don't write an optimized fetch_or_init; the if_init closure could
-- recursively call add, resizing the table, and hence invalidating the
-- cached position of where to add the initialized element.
-- rely on the default table::fetch_or_init instead.

-- store looks for an entry with the same key, and replaces it if one is found.
-- if not, it attempts to reuse a slot containing a 'removed_hash_bucket'
-- sentinel, if it saw one while it was searching.  Otherwise, it uses a
-- bucket containing 'empty_hash_bucket'.
method store(t@:hash_table[`Key,`Value], key:Key, value:Value,
	     if_absent:&():void):void {
    let var store_pos:int := -1;
    t.key_vector.buckets_in_probing_order_do(hash(key, t.key_vector.length),
					     &(i:int,k:Key){
	if(k !== empty_hash_bucket, {
	    if(k !== removed_hash_bucket, {
		if(key = k, { t.value_vector.store(i, value);  ^ });
	    }, {
	        if(store_pos < 0, { store_pos := i; });
	    });
	}, {
	    if(store_pos < 0, { store_pos := i; });
	    t.length := t.length.succ;
	    t.key_vector.store(store_pos, key);
	    t.value_vector.store(store_pos, value);
	    -- t.check_correctness;
	    if(t.should_grow, { t.resize(t.grown_size); });
	    ^
	});
    });
    assert(store_pos >= 0, "should have found a free bucket somewhere");
    t.length := t.length.succ;
    t.key_vector.store(store_pos, key);
    t.value_vector.store(store_pos, value);
    -- t.check_correctness;
    if(t.should_grow, { t.resize(t.grown_size); });
}

-- When we remove an element, we want to try to avoid sticking in
-- removed_hash_bucket, because they slow down future table operations.
-- We are trying to keep the number of removed_hash_bucket values to a minimum.
private method remove_bucket(t@:hash_table[`Key, `Value], idx:int):void {
    if(use_linear_probing, {
	let next_idx:int := t.key_vector.next_probe(idx);
	if(t.key_vector.fetch(next_idx) == empty_hash_bucket, {
	    let var cur_idx:int := idx;
	    until_false({
		t.key_vector.store(cur_idx, cast[Key](empty_hash_bucket));
		t.value_vector.store(cur_idx, cast[Value](empty_hash_bucket));
		cur_idx := t.key_vector.previous_probe(cur_idx);
	    }, { t.key_vector.fetch(cur_idx) == removed_hash_bucket });
	    ^
	});
    });
    t.key_vector.store(idx, cast[Key](removed_hash_bucket));
    t.value_vector.store(idx, cast[Value](removed_hash_bucket));
}

method remove_key(t@:hash_table[`Key,`Value], key:Key,
		  if_absent:&():`Else):Value|Else {
    t.key_vector.buckets_in_probing_order_do(hash(key, t.key_vector.length),
					     &(i:int,k:Key){
	if(k !== empty_hash_bucket, {
	    if(k !== removed_hash_bucket & { key = k }, {
		let v:Value := t.value_vector.fetch(i);
		t.remove_bucket(i);
		t.length := t.length.pred;
		if(t.should_shrink, { t.resize(t.shrunk_size); });
		^ v
	    });
	}, {
	    ^ eval(if_absent)
	});
    });
    eval(if_absent) }

method remove_keys_if(t@:hash_table[`Key,`Value], pred:&(Key):bool):int {
    let var count:int := 0;
    t.key_vector.do_associations(&(i:int, k:Key){
	if(k.is_valid_bucket_element & { eval(pred, k) }, {
	    t.length := t.length.pred;
	    t.remove_bucket(i);
	    count := count + 1;
	});
    });
    if(t.should_shrink, { t.resize(t.shrunk_size); });
    count
}

method remove_all(t@:hash_table[`Key,`Value]):void {
    t.length := 0;
    if(t.should_shrink, {
	let new_sz:int := t.shrunk_size;
	t.key_vector := new_m_vector[Key](new_sz, cast[Key](empty_hash_bucket));
	t.value_vector :=
	  new_m_vector[Value](new_sz, cast[Value](empty_hash_bucket));
    }, {
	-- need to clear those buckets that remain
	t.key_vector.length.do(&(i:int){
	    t.key_vector.store(i, cast[Key](empty_hash_bucket));
	    t.value_vector.store(i, cast[Value](empty_hash_bucket));
	});
    });
}

private method resize(t@:hash_table[`Key,`Value], new_size:int):void {
    -- t.check_correctness;
    let old_len:int := t.length;
    let old_key_vector:m_vector[Key] := t.key_vector;
    let old_value_vector:m_vector[Value] := t.value_vector;
    t.key_vector :=
      new_m_vector[Key](new_size, cast[Key](empty_hash_bucket));
    t.value_vector :=
      new_m_vector[Value](new_size, cast[Value](empty_hash_bucket));
    t.length := 0;
    do(old_key_vector, old_value_vector, &(k:Key, v:Value){
	if(k.is_valid_bucket_element, { t.store(k, v); });
    });
    assert(old_len = t.length, "should have the same elements");
    -- t.check_correctness;
}

-- this method can be called to verify the reasonableness of a probing
-- hash table.  currently, it only checks that the key and value tables
-- have parallel empty_hash_bucket entries.
method check_correctness(t@:hash_table[`Key,`Value]):void {
    good_table_sizes.find(&(valid_size:int){ t.key_vector.length=valid_size },{
	"Table size is not one of the selected prime table sizes!".print_line;
	breakpoint();
    });
    do(t.key_vector, t.value_vector, &(k:Key, v:Value){
	if((k == empty_hash_bucket) != (v == empty_hash_bucket), {
	    "Broken hash table!".print_line;
	    breakpoint();
	});
    });
}

method new_hash_table[Key <= hashable[Key], Value]():hash_table[Key,Value] {
    new_hash_table[Key,Value](default_open_table_size) }
method new_hash_table[Key <= hashable[Key], Value](size:int)
     						    :hash_table[Key,Value] {
    let real_size := prime_table_size(size);
    concrete object isa hash_table[Key,Value] {
	key_vector :=
	  new_m_vector[Key](real_size, cast[Key](empty_hash_bucket)),
	value_vector :=
	  new_m_vector[Value](real_size, cast[Value](empty_hash_bucket)) } }

method copy_empty(t@:hash_table[`Key,`Value]):hash_table[Key,Value] {
    new_hash_table[Key,Value](t.key_vector.length) }
method copy(t@:hash_table[`Key,`Value]):hash_table[Key,Value] {
    concrete object isa hash_table[Key,Value] {
	key_vector := t.key_vector.copy,
	value_vector := t.value_vector.copy,
	length := t.length } }


--
-- table data structure with different printing behavior
--
template object hash_CR_table[Key <= hashable[Key], Value] 
						isa hash_table[Key,Value];

method elem_separator(t@:hash_CR_table[`Key,`Value]):string { "\n" }

method new_hash_CR_table[Key <= hashable[Key], Value]()
					       :hash_CR_table[Key,Value] {
    new_hash_CR_table[Key,Value](default_open_table_size) }
method new_hash_CR_table[Key <= hashable[Key], Value](size:int)
     					       :hash_CR_table[Key,Value] {
    let real_size := prime_table_size(size);
    let t:hash_CR_table[Key,Value] :=
      concrete object isa hash_CR_table[Key,Value] {
	  key_vector :=
	    new_m_vector[Key](real_size, cast[Key](empty_hash_bucket)),
	  value_vector :=
	    new_m_vector[Value](real_size, cast[Value](empty_hash_bucket)) };
    t }

method copy_empty(c@:hash_CR_table[`Key,`Value]):hash_CR_table[Key,Value] {
    new_hash_CR_table[Key,Value](c.key_vector.length) }
method copy(t@:hash_CR_table[`Key,`Value]):hash_CR_table[Key,Value] {
    concrete object isa hash_CR_table[Key,Value] {
	key_vector := t.key_vector.copy,
	value_vector := t.value_vector.copy,
	length := t.length } }

} -- end module HashTable