-- Copyright 1993-1998, by the Cecil Project -- Department of Computer Science and Engineering, University of Washington -- See the LICENSE file for license information. (--DOC A `histogram' supports accumulating counts for particular values and then printing out the results in a reasonable fashion. Abstractly, a `histogram' is a mapping from some domain of values to integers. The `increment' operation bumps the count associated with a particular value. To support the histogram's hash-table-based implementation and its sorted print_string output, the values being counted by the `histogram' must be both hashable and ordered. --) let var max_histogram_values_to_keep:int := 4; template object histogram[T <= ordered_hashable[T]] isa hash_CR_table[T,integer]; field title(@:histogram[`T]):string := ""; field individual_values(@:histogram[`T]):hash_table[T,m_bag[any]] := new_hash_table[T, m_bag[any]](); method new_histogram[T <= ordered_hashable[T]]():histogram[T] { concrete object isa histogram[T] } method new_histogram[T <= ordered_hashable[T]](t:string):histogram[T] { concrete object isa histogram[T] { title := t } } method increment(t@:histogram[`T], x:T):void { increment_by_count(t, x, 1); } method increment_by_count(t@:histogram[`T], x:T, cnt:integer):void { t.store(x, t.fetch_or_init(x, { 0 }) + cnt); } method add_value(t@:histogram[`T], k:T, elem:any):void { let s:m_bag[any] := t.individual_values.fetch_or_init(k, { new_list_bag[any]() }); -- Allow adding one extra, as a symbol that this bucket "overflowed" if(s.length <= max_histogram_values_to_keep, { s.add(elem); }); } method increment(t@:histogram[`T], x:T, elem:any):void { t.increment(x); t.add_value(x, elem); } method print_statistics(t@:histogram[`T]):string { print_statistics(t, t.pick_any_key({ "" })) } method print_statistics(t@:histogram[`T], some_key:T|string):string { let var cnt:integer := 0; t.do_associations(&(key:T, count:integer){ cnt := cnt + count }); ["count: ", cnt.print_string, "\n"].flatten } method print_statistics(t@:histogram[`T <= num], some_key@num:T):string { let var cnt:integer := 0; let var sum:num := 0; t.do_associations(&(key:T, count:integer){ cnt := cnt + count; sum := sum + key * count; }); let avg:float := if(cnt = 0, { 0.0 }, { sum /_float cnt }); ["count: ", cnt.print_string, ", sum: ", sum.print_string, ", avg: ", avg.print_string(3), "\n"].flatten } method sort_it(t@:histogram[`T]):m_sorted_collection[T] { let keys:m_sorted_collection[T] := new_sorted_collection[T](); t.do_associations(&(key:T,count:integer){ keys.add(key); }); keys } private method header_text(t@:histogram[`T]):array[string] { let out:array[string] := new_array[string](2*(t.length+5)); out.add(if(t.title.is_empty, { "histo{\n" }, { t.title || "\nhisto{\n" })); out } private method print_to_array(t@:histogram[`T]):array[string] { let keys:sorted_collection[T] := t.sort_it; let out:array[string] := t.header_text; keys.do(&(key:T){ let val_set:m_bag[any] := t.individual_values.fetch(key, { new_list_bag[any]() }); out.add(["\t", key.print_string, ": ", (t!key).print_string, if(val_set.non_empty & { val_set.length <= max_histogram_values_to_keep }, { "\t" || ( let s:string := val_set.elems_print_string; if(length(s) > 60, { s.copy_from(0, 59) || "..." }, { s }) ) }, { "" }), "\n"].flatten); }); out.add("}, " || t.print_statistics); out } method print_string(t@:histogram[`T]):string { t.print_to_array.flatten } method print(t@:histogram[`T]):void { t.print_to_array.do(&(s:string){ s.print }); } method frequency_sorted_print_string(t@:histogram[`T]):string { let mapping:m_table[integer,m_set[T]] := new_hash_table[integer,m_set[T]](); let var total:integer := 0; t.do_associations(&(k:T, v:integer){ total := total + v; let s := mapping.fetch_or_init(v, { new_hash_set[T]() }); s.add(k); }); let keys:m_sorted_collection[integer] := new_sorted_collection[integer](); mapping.keys_do(&(k:integer){ keys.add(k); }); let out:array[string] := t.header_text; keys.reverse_do(&(count:integer){ (mapping!count).do(&(k:T){ out.add(["\t", count.print_string, "\t", k.print_string, "\n"].flatten); }); }); out.add("}, " || t.print_statistics); out.flatten } method percent_print_string(t@:histogram[`T]):string { let keys:sorted_collection[T] := t.sort_it; let var total:integer := 0; t.do(&(i:integer){ total := total + i; }); let out:array[string] := t.header_text; keys.do(&(key:T){ out.add(["\t", key.print_string, ": " , ((t!key) /_float total * 100).print_string(2), "%\n"].flatten); }); out.add("}"); out.flatten } method truncated_percent_print_string(t@:histogram[`T], over:T):string { let keys:sorted_collection[T] := t.sort_it; let var total:integer := 0; let var over_count:integer := 0; t.do(&(i:integer){ total := total + i; }); let out:array[string] := t.header_text; keys.do(&(key:T){ if(key <= over, { out.add(["\t", key.print_string, ": ", ((t!key) /_float total * 100).print_string(2), "\n"].flatten); }, { over_count := over_count + t!key; }); }); if(over_count > 0, { out.add(["\t", ">", over.print_string, ":", (over_count /_float total * 100).print_string(2), "%\n"].flatten); }); out.add("}"); out.flatten } method distribution[`T <= ordered_hashable[T]]( nm:string, cl:&(increment:&(value:T):void, add_value:&(value:T, elem:any):void):void ):histogram[T] { let h:histogram[T] := new_histogram[T](nm); eval(cl, &(v:T){ h.increment(v); }, &(v:T, elem:any){ h.add_value(v, elem); }); h } method distribution(nm:string, cl:&(increment:&(value:int):void, add_value:&(value:int, elem:any):void):void ):histogram[int] { distribution[int](nm, cl) }