-- Copyright 1993-1998, by the Cecil Project
-- Department of Computer Science and Engineering, University of Washington
-- See the LICENSE file for license information.

(--DOC
  Indexed collections are indexed sequences. They support the behavior
  of keyed tables, where the integers from 0 to c.length-1 serve as
  the keys. Indexed collections inherit:
      `length', `is_empty', `do', `pick_any', `includes', `find',
	  `copy',  etc., from collections;
      `reverse_do', `flatten', `||', etc., from sequences;
      `fetch', `!', `do_associations', `keys_do', etc., from tables.

  Currently, none of the `_allowing_updates' iterators work for indexed
  collections.
--)

abstract object indexed[T] isa sequence[T], table[int,T];
extend type indexed[`T] subtypes indexed[`S >= T];

-- Indexed is a table and an ordered_collection, which identically define !.
-- method !(t@:indexed[`T], key:int): T { resend(t@table[int,T], key); }
method !(t@:indexed[`T], key:int): T { fetch(t, key); }
method fetch(t@:indexed[`T], key:int): T { resend(t@table[int,T], key); }

method do(a@:indexed[`T], c:&(T):void):void  (** inline **) {
    do_associations(a, &(i:int,v:T){ eval(c, v); });
}
method do(a1@:indexed[`T1], a2@:indexed[`T2],
	  c:&(T1,T2):void):void  (** inline **) {
    do(min(a1.length, a2.length), &(i:int){ eval(c, a1!i, a2!i); });
}
method do(a1@:indexed[`T1], a2@:indexed[`T2], a3@:indexed[`T3],
	  c:&(T1,T2,T3):void):void  (** inline **) {
    do(min(a1.length, min(a2.length, a3.length)), &(i:int){
	eval(c, a1!i, a2!i, a3!i);
    });
}
method do(a1@:indexed[`T1], a2@:indexed[`T2], a3@:indexed[`T3], 
	  a4@:indexed[`T4], c:&(T1,T2,T3,T4):void):void  (** inline **) {
    do(min(a1.length, min(a2.length, min(a3.length, a4.length))), &(i:int){
	eval(c, a1!i, a2!i, a3!i, a4!i);
    });
}

method reverse_do(a@:indexed[`T], c:&(T):void):void  (** inline **) {
    let len:int := a.length;
    do(len, &(i:int){ eval(c, a!(len.pred - i)); });
}
method reverse_do(a1@:indexed[`T1], a2@:indexed[`T2],
		  c:&(T1,T2):void):void  (** inline **) {
    let len:int := min(a1.length, a2.length);
    do(len, &(i:int){ eval(c, a1!(len.pred - i), a2!(len.pred - i)); });
}
method reverse_do(a1@:indexed[`T1], a2@:indexed[`T2], a3@:indexed[`T3],
	  c:&(T1,T2,T3):void):void  (** inline **) {
    let len:int := min(a1.length, min(a2.length, a3.length));
    do(len, &(i:int){
	eval(c, a1!(len.pred - i), a2!(len.pred - i), a3!(len.pred - i));
    });
}
method reverse_do(a1@:indexed[`T1], a2@:indexed[`T2], a3@:indexed[`T3],
		  a4@:indexed[`T4], c:&(T1,T2,T3,T4):void):void  (** inline **){
    let len:int := min(a1.length, min(a2.length, min(a3.length, a4.length)));
    do(len, &(i:int){
	eval(c, a1!(len.pred - i), a2!(len.pred - i),
	        a3!(len.pred - i), a4!(len.pred - i));
    });
}

method do_associations(a@:indexed[`T], c:&(int,T):void):void  (** inline **) {
    do(a.length, &(i:int){ eval(c, i, a!i); });
}
method do_associations_allowing_updates(a@:indexed[`T], c:&(int,T):void):void {
    error("cannot update an indexed collection during iteration");
}

method includes_key(a@:indexed[`T], key:int):bool {
    key >= 0 & { key < a.length } }

method keys(t@:indexed[`T]):interval {
    new_interval(0, t.length.pred) }

method ||(s1@:indexed[`T], s2@:indexed[`T]):indexed[T] {
    new_i_vector_init[T](s1.length + s2.length, &(i:int){
	if(i < s1.length, { s1!i }, { s2!(i-s1.length) }) }) }


--DOC The `includes_index' and `find_index' methods are simply renamings
--DOC of the standard `includes_key' and `find_key' methods.

method includes_index(a@:indexed[`T], i:int):bool { includes_key(a, i) }
method find_index(a@:indexed[`T <= comparable[T]], value:T):int {
    find_key(a, value) }
method find_index(a@:indexed[`T <= comparable[T]], value:T,
		  if_absent:&():int):int {
    find_key(a, value, if_absent) }


-- printing: suppress printing of "keys"
method elems_print_string(c@:indexed[`T]):string {
    resend(c@sequence[T]) }
method elems_print(c@:indexed[`T]):void {
    resend(c@sequence[T]);
}


-- use the sequence version, not the table version
method =(c1@:indexed[`T <= comparable[T]], c2@:indexed[T]):bool {
    resend(c1@sequence[T], c2@sequence[T]) }
method <(c1@:indexed[`T <= ordered[T]], c2@:indexed[T]):bool {
    resend(c1@sequence[T], c2@sequence[T]) }
method <=(c1@:indexed[`T <= ordered[T]], c2@:indexed[T]):bool {
    resend(c1@sequence[T], c2@sequence[T]) }


signature copy(indexed[`T]):indexed[T];


--DOC As usual, there are immutable and mutable varieties of indexed
--DOC collections. Mutable collections support changing bindings of
--DOC indices to values through the `store' or `set_!' methods inherited from
--DOC `m_table'.

abstract object i_indexed[T] isa indexed[T], i_table[int,T];
extend type i_indexed[`T] subtypes i_indexed[`S >= T];

abstract object m_indexed[T] isa indexed[T], m_table[int,T];
  method do_associations_allowing_updates(a@:m_indexed[`T],
					  c:&(int,T):void):void {
      -- use the indexed version, not the m_table version
      resend(a@indexed[T], c);
  }

method as_m_indexed(t@:m_indexed[`T]):m_indexed[T] { t }

signature copy(m_indexed[`T]):m_indexed[T];

--DOC There are convenient ways of assigning into the beginning and end
--DOC of a collection. They can be invoked using the assignment message
--DOC sugar, e.g., `first(c) := x'.

method set_first (a@:m_indexed[`T], x:T):void { a!0 := x; }
method set_second(a@:m_indexed[`T], x:T):void { a!1 := x; }
method set_third (a@:m_indexed[`T], x:T):void { a!2 := x; }
method set_fourth(a@:m_indexed[`T], x:T):void { a!3 := x; }
method set_last  (a@:m_indexed[`T], x:T):void { a!a.length.pred := x; }


--DOC The swap method exchanges the indices of two collection elements.
method swap(c@:m_indexed[`T], i:int, j:int):void {
    let t:T := c!i;
    c!i := c!j;
    c!j := t;
}


-- sorting operations

--DOC The `sort' method sorts a collection in place, using either the
--DOC element type's natural comparison operation (`<=') or a user-supplied
--DOC comparator function. Sorting uses the quicksort algorithm and
--DOC attempts to be a reasonably stable sort.
method sort(c@:m_indexed[`T <= ordered[T]]):void {
    sort_by(c, &(x1:T, x2:T){ x1 <= x2 });
}

method sort_by(c@:m_indexed[`T], pred:&(T,T):bool):void {
    sort_by(c, pred, 0, c.length.pred);
}

-- the quicksort algorithm
method sort_by(c@:m_indexed[`T], pred:&(T,T):bool,
	       first_index:int, last_index:int):void {
    -- check for base case
    if(first_index >= last_index, {^});

    -- select pivot; use median of first, middle, and last elements
    let middle_index:int := average(first_index, last_index);
    let first:T  := c!first_index;
    let middle:T := c!middle_index;
    let last:T   := c!last_index;
    let pivot_index :=
      if(eval(pred, first, middle), {
	  if(eval(pred, middle, last), {
	      middle_index
	  }, {
	      if(eval(pred, first, last), {
		  last_index
	      }, {
		  first_index
	      })
	  })
      }, {
	  if(eval(pred, last, middle), {
	      middle_index
	  }, {
	      if(eval(pred, first, last), {
		  first_index
	      }, {
		  last_index
	      })
	  })
      });
    let pivot:T := c!pivot_index;

    -- partition array in place
    let var f:int := first_index;
    let var l:int := last_index;
    loop_exit(&(break:&():none){
	-- slide upper pointer down past elements that are bigger than pivot
	-- (but don't go too far past lower pointer; no point to it)
	while({ l >= f & { eval(pred, pivot, c!l) }},
	      { l := l.pred; });

	-- slide lower pointer up past elements that are smaller than pivot
	-- (but don't go too far past upper pointer; no point to it)
	while({ f <= l & { eval(pred, c!f, pivot) }},
	      { f := f.succ; });

	-- check if done
	if(f >= l, break);

	-- swap elements in wrong partitions and continue
	c.swap(f, l);

	f := f.succ;
	l := l.pred;
    });

    -- special cases: check if left-hand subarray is empty
    if(f = first_index, {
	-- split off pivot as sole member of bottom array
	c.swap(f, pivot_index);
	f := f.succ;
	assert(l = first_index.pred);
	l := first_index;
    }, {
	if(l = last_index, {
	    -- split off pivot as sole member of upper array
	    c.swap(pivot_index, l);
	    l := l.pred;
	    assert(f = last_index.succ);
	    f := last_index;
	});
    });

(--
    print_line("partitioning into " || first_index.print_string ||
	       "-" || f.pred.print_string ||
	       " and " || l.succ.print_string ||
	       "-" || last_index.print_string);
--)

    -- recursively sort subarrays
    sort_by(c, pred, first_index, f.pred);
    sort_by(c, pred, l.succ, last_index);
}


-- subsequence detection

--DOC The `pos' method returns the index at which the first
--DOC collection occurs in the second collection (invoking the
--DOC `if_absent' closure if not found), using the Knuth-Morris-Pratt
--DOC string-matching algorithm.  The `has_subsequence' method returns whether
--DOC the second collection is found in the first collection, and the 
--DOC `count_subsequences' method returns the number of non-overlapping
--DOC occurrences of the second collection in the first.  [Note that the 
--DOC order of collection arguments in `pos' is opposite to that in the
--DOC `subsequence' methods!]

-- use the KMP string matching algorithm to find the first occurence of the
-- first indexed thing in the second indexed thing.
-- for example pos("cat","the cat is fat",if_fail) returns 4
--             pos("cat","xyyzzy",if_fail) evaluates the failure continuation.

-- first some helper functions for KMP match
-- intializes the next array used for the matching
private method KMP_initnext(sub@:indexed[`T<=comparable[T]]):vector[int] {
    let m:int := length(sub);
    let next:m_vector[int] := new_m_vector[int](m+1);

    next!1 := 0;

    let var k:int := 0;
    do(m.pred, &(temp:int){
	let q:int := temp+2;
    	while({ k > 0 & { sub!k != sub!q.pred } }, { k := next!k; });
    	if(sub!k = sub!q.pred, { k:=k.succ; });
	next!q := k;
    });

    next }

-- does the pattern matching using the next table
private method KMP_match(sub@:indexed[`T <= comparable[T]], target@:indexed[T],
			 if_fail:&():int):int {
    let n:int := length(target);
    let m:int := length(sub);
    let next:vector[int] := KMP_initnext(sub);

    let var q:int :=0;
    do(n, &(tempi:int){
	let i:int := tempi.succ;
        while({ q > 0 & { sub!q != target!i.pred } }, { q := next!q; });
        if(sub!q = target!i.pred, { q := q.succ; });
        if(q = m, { ^ i-m });    --- found a match
    });

    -- didn't find a match
    eval(if_fail) }


-- the top level method which should be invoked by a user
method pos(s1:indexed[`T <= comparable[T]], s2@:indexed[T]):int {
    pos(s1, s2, { error("pos: pattern not found in target string") }) }
method pos(s1@:indexed[`T <= comparable[T]], s2@:indexed[T],
	   if_fail:&():int):int{
    KMP_match(s1, s2, if_fail) }


method has_subsequence(s1@:indexed[`T <= comparable[T]], s2@:indexed[T]):bool {
    pos(s2, s1, { ^ false });
    true }


method count_subsequences(s1@:indexed[`T <= comparable[T]],
			  s2@:indexed[T]):int {
    let len := s2.length;
    let var count:int := 0;
    let var view:indexed[T] := s1;
    loop({
	let pos:int := pos(s2, view, { ^ count });
	count := count + 1;
	view := view.view_subrange(pos+len);
    }) }