## red-black-map-g.pkg
#
# This code is based on Chris Okasaki's implementation of
# red-black trees. The linear-time tree construction code is
# based on the paper "Constructing red-black trees" by Ralf Hinze,
# http://www.eecs.usma.edu/webs/people/okasaki/waaapl99.pdf#page=95
# and the delete function is based on the description in Cormen,
# Leiserson, and Rivest.
#
# A red-black tree should satisfy the following two invariants:
#
# Red Invariant: Each red node has a black parent.
#
# Black Condition: Each path from the root to an empty node
# has the same number of black nodes
# (the tree's black height).
#
# The Red condition implies that the root is always black and the Black
# condition implies that any node with only one child will be black and
# its child will be a red leaf.
# Compiled by:
#
src/lib/std/standard.lib### "A tree growing out of the ground
### is as wonderful today as it ever was.
###
### It does not need to adopt
### new and startling methods."
###
### -- Robert Henri
# This generic gets compile-time expanded in:
#
src/lib/src/string-map.pkg#
src/lib/src/quickstring-red-black-map.pkg#
src/lib/src/digraph-strongly-connected-components-g.pkg#
src/app/makelib/paths/anchor-dictionary.pkg# ...
# (It might be the most-used generic in the codebase.)
generic package red_black_map_g (k: Key) # Key is from
src/lib/src/key.api: Map where key == k # Map is from
src/lib/src/map.api{
package key = k;
Color = RED
| BLACK;
# Internal tree node:
#
Tree(X)
= EMPTY
| TREE_NODE ( Color,
Tree(X), # Left subtree.
key::Key, # Key.
X, # Value.
Tree(X) # Right subtree.
)
;
# Header node. Every complete
# map is represented by one:
#
Map(X) = MAP ( Int, # Count of nodes in the tree -- zero for an empty map.
Tree(X) # Tree containing one node per key-val pair in map.
);
#
fun is_empty (MAP(_, EMPTY)) => TRUE;
is_empty _ => FALSE;
end;
empty = MAP (0, EMPTY);
#
fun singleton (key, value)
=
MAP (1, TREE_NODE (RED, EMPTY, key, value, EMPTY));
# Check invariants:
#
fun all_invariants_hold (MAP (nodecount, EMPTY))
=>
nodecount == 0;
all_invariants_hold (MAP (nodecount, TREE_NODE (RED,_,_,_,_) ) )
=>
FALSE; # RED root is not ok.
all_invariants_hold (MAP (nodecount, tree))
=>
( black_invariant_ok tree
and
red_invariant_ok (TRUE, tree)
and
nodecount_ok (nodecount, tree)
)
where
# Every path from root to any leaf must
# contain the same number of BLACK nodes:
#
fun black_invariant_ok tree
=
{ # Compute the black depth along one
# arbitrary path for reference:
#
black_depth = leftmost_blackdepth (0, tree);
# Check that black depth along all other paths matches:
#
check_blackdepth_on_all_paths (0, tree)
where
fun check_blackdepth_on_all_paths (n, EMPTY)
=>
n == black_depth;
check_blackdepth_on_all_paths (n, TREE_NODE (BLACK, left_subtree,_,_, right_subtree))
=>
check_blackdepth_on_all_paths (n+1, left_subtree)
and
check_blackdepth_on_all_paths (n+1, right_subtree);
check_blackdepth_on_all_paths (n, TREE_NODE (RED, left_subtree,_,_, right_subtree))
=>
check_blackdepth_on_all_paths (n, left_subtree)
and
check_blackdepth_on_all_paths (n, right_subtree);
end;
end;
}
where
fun leftmost_blackdepth (n, EMPTY) => n;
leftmost_blackdepth (n, TREE_NODE (RED, left_subtree, _,_,_)) => leftmost_blackdepth (n, left_subtree);
leftmost_blackdepth (n, TREE_NODE (BLACK, left_subtree, _,_,_)) => leftmost_blackdepth (n+1, left_subtree);
end;
end;
# A RED node must always have a BLACK parent:
#
fun red_invariant_ok (parent_was_black, EMPTY)
=>
TRUE;
red_invariant_ok (parent_was_black, TREE_NODE (RED, left_subtree, _,_, right_subtree))
=>
parent_was_black
and
red_invariant_ok (FALSE, left_subtree)
and
red_invariant_ok (FALSE, right_subtree);
red_invariant_ok (parent_was_black, TREE_NODE (BLACK, left_subtree, _,_, right_subtree))
=>
red_invariant_ok (TRUE, left_subtree)
and
red_invariant_ok (TRUE, right_subtree);
end;
# The count field in the header must
# equal the number of nodes in the tree:
#
fun nodecount_ok (nodecount, tree)
=
nodecount == count_nodes tree
where
fun count_nodes EMPTY
=>
0;
count_nodes (TREE_NODE (_, left_subtree, _,_, right_subtree))
=>
count_nodes left_subtree
+
count_nodes right_subtree
+
1;
end;
end;
end;
end;
# A debugging 'print' to show
# structure of tree:
#
fun debug_print_tree (print_key, print_val, tree, indent0)
=
debug_print_tree' (tree, 4, 0)
where
fun debug_print_tree' (tree, indent, count)
=
case tree
#
EMPTY => count;
TREE_NODE (color, left, key, value, right)
=>
{ count = debug_print_tree' (left, indent+5, count);
print (do_indent (indent0, []));
printf "%4d: " count;
print_val value;
print " ";
print_key key;
print " key";
print " ";
pad1_string = do_indent (indent, []);
color_string = case color RED => "RED"; BLACK => "BLACK"; esac;
string = pad1_string + color_string;
size = string::length_in_bytes string;
pad2_string = do_indent (40-size, []);
print string;
print pad2_string;
print "\n";
debug_print_tree' (right, indent+5, count+1);
}
where
fun do_indent (n, l)
=
if (n > 0 ) { do_indent (n - 1, " " ! l); };
else cat l; fi;
end;
esac;
end;
fun debug_print ( MAP tree,
print_key,
print_val
)
=
{ print "\n";
debug_print_tree (print_key, print_val, #2 tree, 0);
};
#
fun set (MAP (n_items, m), key1, val1)
=
{ m = case (set'' m)
#
TREE_NODE (RED, left_subtree, key, value, right_subtree)
=>
# Enforce invariant that root is always BLACK.
# (It is always safe to change the root from
# RED to BLACK.)
#
# Since the well-tested SML/NJ code returns
# trees with RED roots, this may not be necessary.
#
TREE_NODE (BLACK, left_subtree, key, value, right_subtree);
other => other;
esac;
MAP (*n_items', m);
}
where
n_items' = REF n_items;
#
fun set'' EMPTY
=>
{ n_items' := n_items+1;
TREE_NODE (RED, EMPTY, key1, val1, EMPTY);
};
set'' (s as TREE_NODE (s_color, a, key2, val2, b))
=>
case (key::compare (key1, key2))
#
LESS => case a
#
TREE_NODE (RED, c, key3, val3, d)
=>
case (key::compare (key1, key3))
#
LESS
=>
case (set'' c)
#
TREE_NODE (RED, e, wk, w, f)
=>
TREE_NODE (RED, TREE_NODE (BLACK, e, wk, w, f), key3, val3, TREE_NODE (BLACK, d, key2, val2, b));
c => TREE_NODE (BLACK, TREE_NODE (RED, c, key3, val3, d), key2, val2, b);
esac;
EQUAL
=>
TREE_NODE (s_color, TREE_NODE (RED, c, key1, val1, d), key2, val2, b);
GREATER
=>
case (set'' d)
#
TREE_NODE (RED, e, wk, w, f)
=>
TREE_NODE (RED, TREE_NODE (BLACK, c, key3, val3, e), wk, w, TREE_NODE (BLACK, f, key2, val2, b));
d => TREE_NODE (BLACK, TREE_NODE (RED, c, key3, val3, d), key2, val2, b);
esac;
esac;
_ => TREE_NODE (BLACK, set'' a, key2, val2, b);
esac;
EQUAL => TREE_NODE (s_color, a, key1, val1, b);
GREATER
=>
case b
#
TREE_NODE (RED, c, key3, val3, d)
=>
case (key::compare (key1, key3))
#
LESS => case (set'' c)
#
TREE_NODE (RED, e, wk, w, f)
=>
TREE_NODE (RED, TREE_NODE (BLACK, a, key2, val2, e), wk, w, TREE_NODE (BLACK, f, key3, val3, d));
c => TREE_NODE (BLACK, a, key2, val2, TREE_NODE (RED, c, key3, val3, d) );
esac;
EQUAL => TREE_NODE (s_color, a, key2, val2, TREE_NODE (RED, c, key1, val1, d));
GREATER => case (set'' d)
#
TREE_NODE (RED, e, wk, w, f)
=>
TREE_NODE (RED, TREE_NODE (BLACK, a, key2, val2, c), key3, val3, TREE_NODE (BLACK, e, wk, w, f));
d => TREE_NODE (BLACK, a, key2, val2, TREE_NODE (RED, c, key3, val3, d));
esac;
esac;
_ => TREE_NODE (BLACK, a, key2, val2, set'' b);
esac;
esac;
end;
end;
# A synonym for 'set', so that we can write
# map $= (key, value);
# instead of the clumsier
# map = set( map, key, value );
#
fun m $ (key1, val1)
=
set (m, key1, val1);
#
fun set' ((key1, val1), m)
=
set (m, key1, val1);
# Is a key in the domain of the map?
#
fun contains_key (MAP(_, t), k)
=
get' t
where
fun get' EMPTY
=>
FALSE;
get' (TREE_NODE(_, a, key2, val2, b))
=>
case (key::compare (k, key2))
#
LESS => get' a;
EQUAL => TRUE;
GREATER => get' b;
esac;
end;
end;
# Is a key in the domain of the map?
#
fun contains_key (MAP(_, t), k)
=
get' t
where
fun get' EMPTY
=>
FALSE;
get' (TREE_NODE(_, a, key2, val2, b))
=>
case (key::compare (k, key2))
#
LESS => get' a;
EQUAL => TRUE;
GREATER => get' b;
esac;
end;
end;
fun preceding_key (MAP(_, t), k)
=
get' (t, NULL)
where
fun maxkey (EMPTY, result)
=>
result;
maxkey (TREE_NODE(_, a, key2, val2, b), result)
=>
maxkey (b, THE key2);
end;
fun get' (EMPTY, result)
=>
result;
get' (TREE_NODE(_, a, key2, val2, b), result)
=>
case (key::compare (k, key2))
#
LESS => get' (a, result);
EQUAL => maxkey(a, result);
GREATER => get' (b, THE key2);
esac;
end;
end;
fun following_key (MAP(_, t), k)
=
get' (t, NULL)
where
fun minkey (EMPTY, result)
=>
result;
minkey (TREE_NODE(_, a, key2, val2, b), result)
=>
minkey (a, THE key2);
end;
fun get' (EMPTY, result)
=>
result;
get' (TREE_NODE(_, a, key2, val2, b), result)
=>
case (key::compare (k, key2))
#
LESS => get' (a, THE key2);
EQUAL => minkey(b, result);
GREATER => get' (b, result);
esac;
end;
end;
# Return (THE value) corresponding to a key,
# or NULL if the key is not present:
#
fun get (MAP(_, t), k)
=
get' t
where
fun get' EMPTY
=>
NULL;
get' (TREE_NODE(_, a, key2, val2, b))
=>
case (key::compare (k, key2))
#
LESS => get' a;
EQUAL => THE val2;
GREATER => get' b;
esac;
end;
end;
# Return value corresponding to a key,
# raising lib_base::NOT_FOUND if the
# key is not present:
#
fun get_or_raise_exception_not_found (MAP(_, t), k)
=
get' t
where
fun get' EMPTY
=>
raise exception lib_base::NOT_FOUND;
get' (TREE_NODE(_, a, key2, val2, b))
=>
case (key::compare (k, key2))
#
LESS => get' a;
EQUAL => val2;
GREATER => get' b;
esac;
end;
end;
# Remove a keyval, returning new map and value removed.
# Raise lib_base::NOT_FOUND if not found.
#
stipulate
Descent_Path(X)
= TOP
| LEFT ((Color, key::Key, X, Tree(X), Descent_Path(X)) )
| RIGHT ((Color, Tree(X), key::Key, X, Descent_Path(X)) )
;
fun drop' (input as MAP (n_items, input_tree), key_to_drop)
=
{
# We produce our result tree by copying
# our descent path nodes one by one,
# starting at the leafward end and proceeding
# to the root.
#
# We have two copying cases to consider:
#
# 1) Initially, our deletion may have produced
# a violation of the RED/BLACK invariants
# -- specifically, a BLACK deficit -- forcing
# us to do on-the-fly rebalancing as we go.
#
# 2) Once the BLACK deficit is resolved (or immediately,
# if none was created), copying cannot produce any
# additional invariant violations, so path copying
# becomes an utterly trivial matter of node duplication.
#
# We have two separate routines to handle these two cases:
#
# copy_path Handles the trivial case.
# copy_path' Handles the rebalancing-needed case.
#
fun copy_path (TOP, t) => t;
copy_path (LEFT (color, key, value, b, rest_of_path), a) => copy_path (rest_of_path, TREE_NODE (color, a, key, value, b));
copy_path (RIGHT (color, a, key, value, rest_of_path), b) => copy_path (rest_of_path, TREE_NODE (color, a, key, value, b));
end;
# copy_path' propagates a black deficit
# up the descent path until either the top
# is reached, or the deficit can be
# covered.
#
# Arguments:
# o descent_path, the worklist of nodes which need to be copied.
# o result_tree, our results-so-far accumulator.
#
#
# Its return value is a pair containing:
# o black_deficit: A boolean flag which is TRUE iff there is still a deficit.
# o The new tree.
#
fun copy_path' (TOP, t)
=>
(TRUE, t);
# Nomenclature: In the below diagrams, I use '1B' == "BLACK node containing key1"
# '2R' == "RED node containing key2"
# etc.
# 'X' can match RED or BLACK (but not both) within any given rule.
# 'a', 'b' represent the current node/subtree.
# 'c', 'd', 'e' represent arbitrary other node/subtrees (possibly EMPTY).
#
# For the cited Wikipedia case discussions and diagrams, see
# http://en.wikipedia.org/wiki/Red_black_tree
#
# 1B 2B Wikipedia Case 2
# / \ -> / d
# a 2R 1R
# c d a c
#
#
copy_path' (LEFT (BLACK, key1, val1, TREE_NODE (RED, c, key2, val2, d), path), a) # Case 1L
=>
copy_path' (LEFT (RED, key1, val1, c, LEFT (BLACK, key2, val2, d, path)), a);
#
# We ('a') now have a RED parent and BLACK sibling, so case 4, 5 or 6 will apply.
# 1 1 Wikipedia Case 5
# / \ / \
# a 3B -> a 2B
# 2R e c 3R
# c d d e
#
copy_path' (LEFT (color, key1, val1, TREE_NODE (BLACK, TREE_NODE (RED, c, key2, val2, d), wk, w, e), path), a) # Case 3L
=>
copy_path' (LEFT (color, key1, val1, TREE_NODE (BLACK, c, key2, val2, TREE_NODE (RED, d, wk, w, e)), path), a);
# 1X 2X Wikipedia Case 6
# / \ / \
# a 2B -> 1B 3B
# c 3R a c d e
# d e
#
copy_path' (LEFT (color, key1, val1, TREE_NODE (BLACK, c, key2, val2, TREE_NODE (RED, d, key3, val3, e)), path), a) # Case 4L
=>
(FALSE, copy_path (path, TREE_NODE (color, TREE_NODE (BLACK, a, key1, val1, c), key2, val2, TREE_NODE (BLACK, d, key3, val3, e))));
# 1R 1B Wikipedia Case 4
# / \ / \
# a 2B -> a 2R
# c d c d
#
copy_path' (LEFT (RED, key1, val1, TREE_NODE (BLACK, c, key2, val2, d), path), a) # Case 2L
=>
(FALSE, copy_path (path, TREE_NODE (BLACK, a, key1, val1, TREE_NODE (RED, c, key2, val2, d))));
#
# BLACK sib has exchanged color with RED parent;
# this makes up the BLACK deficit on our side
# without affecting black path counts on sib's side,
# so we're done rebalancing and can revert to
# simple path copying for the rest of the way back
# to the root.
# 1B 1B Wikipedia Case 3
# / \ / \
# a 2B -> a 2R
# c d c d
#
copy_path' (LEFT (BLACK, key1, val1, TREE_NODE (BLACK, c, key2, val2, d), path), a) # Case 2L
=>
copy_path' (path, TREE_NODE (BLACK, a, key1, val1, TREE_NODE (RED, c, key2, val2, d)));
#
# Changing BLACK sib to RED locally rebalances in the
# sense that paths through us ('a') and our sib (2)
# both have the same number of BLACK nodes, but our
# subtree as a whole has a BLACK pathcount one lower
# than initially, so we continue the rebalancing
# act in our parent.
# 1B 2B Wikipidia Case 2 (Mirrored)
# / \ / \
# 2R b -> c 1R
# c d d b
# _____
copy_path' (RIGHT (BLACK, TREE_NODE (RED, c, key2, val2, d), key1, val1, path), b) # Case 1R
=>
copy_path' (RIGHT (RED, d, key1, val1, RIGHT (BLACK, c, key2, val2, path)), b);
#
# We ('b') now have a RED parent and BLACK sibling, so mirrored case 4, 5 or 6 will apply.
# 1X 2X Wikipedia Case 6 (Mirrored)
# / \ / \
# 2B b -> 3B 1B
# 3R e c d e b
# c d
#
copy_path' (RIGHT (color, TREE_NODE (BLACK, TREE_NODE (RED, c, key3, val3, d), key2, val2, e), key1, val1, path), b) # Case 3R
=>
(FALSE, copy_path (path, TREE_NODE (color, TREE_NODE (BLACK, c, key3, val3, d), key2, val2, TREE_NODE (BLACK, e, key1, val1, b))));
# OLD BROKEN CODE copy_path' (RIGHT (color, TREE_NODE (BLACK, c, key3, val3, TREE_NODE (RED, d, key2, val2, e)), key1, val1, path), b);
# 1 1 Wikipedia Case 5 (Mirrored)
# / \ / \
# 2B b -> 3B b
# c 3R 2R e
# d e c d
#
copy_path' (RIGHT (color, TREE_NODE (BLACK, c, key2, val2, TREE_NODE (RED, d, key3, val3, e)), key1, val1, path), b) # Case 4R
=>
copy_path' (RIGHT (color, TREE_NODE (BLACK, TREE_NODE (RED, c, key2, val2, d), key3, val3, e), key1, val1, path), b);
# OLD BROKEN CODE (FALSE, copy_path (path, TREE_NODE (color, c, key2, val2, TREE_NODE (BLACK, TREE_NODE (RED, d, key3, val3, e), key1, val1, b))));
# 1R 1B Wikipedia Case 4 (Mirrored)
# / \ / \
# 2B b -> 2R b
# c d c d
#
copy_path' (RIGHT (RED, TREE_NODE (BLACK, c, key2, val2, d), key1, val1, path), b) # Case 2R
=>
(FALSE, copy_path (path, TREE_NODE (BLACK, TREE_NODE (RED, c, key2, val2, d), key1, val1, b)));
#
# BLACK sib has exchanged color with RED parent;
# this makes up the BLACK deficit on our side
# without affecting black path counts on sib's side,
# so we're done rebalancing and can revert to
# simple path copying for the rest of the way back
# to the root.
# 1B 1B Wikipedia Case 3 (Mirrored)
# / \ / \
# 2B b -> 2R b
# c d c d
#
copy_path' (RIGHT (BLACK, TREE_NODE (BLACK, c, key2, val2, d), key1, val1, path), b) # Case 2R
=>
copy_path' (path, TREE_NODE (BLACK, TREE_NODE (RED, c, key2, val2, d), key1, val1, b));
copy_path' (path, t)
=>
(FALSE, copy_path (path, t));
end;
# Here's our routine for the descent phase.
#
# Arguments:
# key_to_delete: key identifying which node to delete
# current_subtree: Subtree to search, using "in-order": Left subtree first, then this node, then right subtree.
# descent_path: Stack of values recording our descent path to date.
#
fun descend (key_to_delete, EMPTY, descent_path)
=>
raise exception lib_base::NOT_FOUND;
descend (key_to_delete, TREE_NODE (color, left_subtree, key, value, right_subtree), descent_path)
=>
case (key::compare (key_to_delete, key))
#
LESS => descend (key_to_delete, left_subtree, LEFT (color, key, value, right_subtree, descent_path));
GREATER => descend (key_to_delete, right_subtree, RIGHT (color, left_subtree, key, value, descent_path));
EQUAL => join (color, left_subtree, right_subtree, descent_path);
esac;
end
# Once we've found and removed the requested node,
# we are left with the problem of combining its
# former left and right subtrees into a replacement
# for the node -- while preserving or restoring
# our RED/BLACK invariants. That's our job here.
#
# Arguments:
# color: Color of now-deleted node.
# left_subtree: Left subtree of now-deleted node.
# right_subtree: Right subtree of now-deleted node.
# descent_path: Path by which we reached now-deleted node.
# (To us at this point the descent_path reperesents
# the worklist of nodes to duplicate in order to
# produce the result tree.)
#
also
fun join (RED, EMPTY, EMPTY, descent_path) => copy_path (descent_path, EMPTY );
join (RED, left_subtree, EMPTY, descent_path) => copy_path (descent_path, left_subtree );
join (RED, EMPTY, right_subtree, descent_path) => copy_path (descent_path, right_subtree );
join (BLACK, left_subtree, EMPTY, descent_path) => #2 (copy_path' (descent_path, left_subtree));
join (BLACK, EMPTY, right_subtree, descent_path) => #2 (copy_path' (descent_path, right_subtree));
join (color, left_subtree, right_subtree, descent_path)
=>
{ # We have two non-empty children.
#
# We bubble up a key-val pair to fill this node,
# creating a delete-node problem below which is
# guaranteed to have at most one nonempty child:
#
# Replace deleted keyval with
# keyval from first node in our
# right subtree:
#
my (replacement_key, replacement_val) = min_keyval right_subtree;
# Now, act as though the delete never happened:
# just continue our descent, with replacement_key in
# right subtree as our new delete target:
#
descend( replacement_key, right_subtree, RIGHT (color, left_subtree, replacement_key, replacement_val, descent_path) );
}
where
#
fun min_keyval (TREE_NODE (_, EMPTY, key, value, _)) => (key, value);
min_keyval (TREE_NODE (_, left_subtree, _, _, _)) => min_keyval left_subtree;
min_keyval EMPTY => raise exception MATCH; # "Impossible"
end;
end;
end;
dropped_value
=
case (get (input, key_to_drop))
#
THE value => value;
NULL => raise exception lib_base::NOT_FOUND;
esac;
new_tree
=
case (descend (key_to_drop, input_tree, TOP))
#
# Enforce the invariant that
# the root node is always BLACK:
#
TREE_NODE (RED, left_subtree, key, value, right_subtree)
=>
TREE_NODE (BLACK, left_subtree, key, value, right_subtree);
ok => ok;
esac;
(MAP (n_items - 1, new_tree), dropped_value);
};
herein
fun drop (old_map, key_to_drop) # Return new_map, or old_map if key_to_drop was not found.
=
#1 (drop' (old_map, key_to_drop))
except
lib_base::NOT_FOUND = old_map;
fun get_and_drop (old_map, key_to_drop) # Return (new_map, THE value) or (old_map, NULL) if key_to_drop was not found.
=
{ (drop' (old_map, key_to_drop))
->
(new_map, val);
(new_map, THE val);
}
except
lib_base::NOT_FOUND = (old_map, NULL);
end; # stipulate
# Return the first item in the map (or NULL if it is empty):
#
fun first_val_else_null (MAP(_, t))
=
f t
where
fun f EMPTY => NULL;
f (TREE_NODE(_, EMPTY, _, val1, _)) => THE val1;
f (TREE_NODE(_, a, _, _, _)) => f a;
end;
end;
#
fun first_keyval_else_null (MAP(_, t))
=
f t
where
fun f EMPTY => NULL;
f (TREE_NODE(_, EMPTY, key1, val1, _)) => THE (key1, val1);
f (TREE_NODE(_, a, _, _, _)) => f a;
end;
end;
# Return the last item in the map (or NULL if it is empty):
#
fun last_val_else_null (MAP(_, t))
=
f t
where
fun f EMPTY => NULL;
f (TREE_NODE(_, _, _, val1, EMPTY)) => THE val1;
f (TREE_NODE(_, _, _, _, a )) => f a;
end;
end;
#
fun last_keyval_else_null (MAP(_, t))
=
f t
where
fun f EMPTY => NULL;
f (TREE_NODE(_, _, key1, val1, EMPTY)) => THE (key1, val1);
f (TREE_NODE(_, _, _, _, a )) => f a;
end;
end;
# Return the number of items in the map:
#
fun vals_count (MAP (n, _))
=
n;
#
fun fold_forward f
=
{ fun foldf (EMPTY, accum)
=>
accum;
foldf (TREE_NODE(_, a, _, val1, b), accum)
=>
foldf (b, f (val1, foldf (a, accum)));
end;
\\ init
=
\\ (MAP(_, m))
=
foldf (m, init);
};
#
fun keyed_fold_forward f
=
{ fun foldf (EMPTY, accum)
=>
accum;
foldf (TREE_NODE(_, a, key1, val1, b), accum)
=>
foldf (b, f (key1, val1, foldf (a, accum)));
end;
\\ init
=
\\ (MAP(_, m))
=
foldf (m, init);
};
#
fun fold_backward f
=
{ fun foldf (EMPTY, accum)
=>
accum;
foldf (TREE_NODE(_, a, _, val1, b), accum)
=>
foldf (a, f (val1, foldf (b, accum)));
end;
\\ init
=
\\ (MAP(_, m))
=
foldf (m, init);
};
#
fun keyed_fold_backward f
=
{ fun foldf (EMPTY, accum)
=>
accum;
foldf (TREE_NODE(_, a, key1, val1, b), accum)
=>
foldf (a, f (key1, val1, foldf (b, accum)));
end;
\\ init
=
\\ (MAP(_, m))
=
foldf (m, init);
};
#
fun vals_list m
=
fold_backward (!) [] m;
#
fun keyvals_list m
=
keyed_fold_backward (\\ (key1, val1, l) = (key1, val1) ! l) [] m;
# Return an ordered list of the keys in the map:
#
fun keys_list m
=
keyed_fold_backward (\\ (k, _, l) = k ! l) [] m;
# Functions for walking the tree
# while keeping a stack of parents
# to be visited:
#
fun next ((t as TREE_NODE(_, _, _, _, b)) ! rest) => (t, left (b, rest));
next _ => (EMPTY, []);
end
also
fun left (EMPTY, rest)
=>
rest;
left (t as TREE_NODE(_, a, _, _, _), rest)
=>
left (a, t ! rest);
end;
#
fun start m
=
left (m, []);
# Given an ordering on the map's vals,
# return an ordering on the maps:
#
fun compare_sequences compare_vals
=
{ fun compare (tree1, tree2)
=
case (next tree1, next tree2)
#
((EMPTY, _), (EMPTY, _)) => EQUAL;
((EMPTY, _), _ ) => LESS;
(_, (EMPTY, _)) => GREATER;
( (TREE_NODE(_, _, key1, val1, _), r1),
(TREE_NODE(_, _, key2, val2, _), r2)
)
=>
case (key::compare (key1, key2))
#
EQUAL => case (compare_vals (val1, val2))
#
EQUAL => compare (r1, r2);
order => order;
esac;
order => order;
esac;
esac;
\\ ( MAP(_, m1),
MAP(_, m2)
)
=
compare (start m1, start m2);
};
# Support for constructing red-black trees
# in linear time from increasing ordered
# sequences.
#
# Based on a description by Ralf Hinze
# http://www.eecs.usma.edu/webs/people/okasaki/waaapl99.pdf#page=95
# which represents tree structures
# via binary numbers using only the digits
# 1 and 2. (0 is used only for the empty tree.)
#
# Note that the elements in the digits
# are ordered with the largest on the left,
# whereas the elements of the trees
# are ordered with the largest on the right.
#
Digit(X)
= ZERO
| ONE ((key::Key, X, Tree(X), Digit(X)) )
| TWO ((key::Key, X, Tree(X), key::Key, X, Tree(X), Digit(X)) )
;
# Add a keyval that is guaranteed
# to be larger than any in l:
#
fun add_item (key, value, l)
=
incr (key, value, EMPTY, l)
where
fun incr (key, value, tree, ZERO)
=>
ONE (key, value, tree, ZERO);
incr ( key1, val1, tree1,
ONE ( key2, val2, tree2,
rest
)
)
=>
TWO ( key1, val1, tree1,
key2, val2, tree2,
rest
);
incr ( key1, val1, tree1,
TWO ( key2, val2, tree2,
key3, val3, tree3,
rest
)
)
=>
ONE ( key1, val1, tree1,
incr ( key2, val2, TREE_NODE (BLACK, tree3, key3, val3, tree2),
rest
)
);
end;
end;
# Link the digits into a tree:
#
fun link_all digits
=
link (EMPTY, digits)
where
# We consume digits from our second argument and
# accumulate our eventual result in our first argument:
#
fun link (result_tree, ZERO)
=>
result_tree;
link (result_tree, ONE (key, value, tree, rest))
=>
link (TREE_NODE (BLACK, tree, key, value, result_tree), rest);
link ( result_tree,
TWO ( key1, val1, tree1,
key2, val2, tree2,
rest
)
)
=>
link ( TREE_NODE(BLACK, TREE_NODE (RED, tree2, key2, val2, tree1), key1, val1, result_tree),
rest
);
end;
end;
fun difference_with (m1, m2)
=
{ keys_to_remove = keys_list m2;
#
remove (m1, keys_to_remove)
where
fun remove (m1, [])
=>
m1;
remove (m1, key ! rest)
=>
remove (drop (m1, key), rest);
end;
end;
};
fun from_list (pairs: List((key::Key, X)))
=
{ tree = empty;
#
add (tree, pairs)
where
fun add (tree, [])
=>
tree;
add (tree, (key,val) ! rest)
=>
add (set (tree, key, val), rest);
end;
end;
};
stipulate
#
fun wrap f (MAP(_, map1), MAP(_, map2))
=
{ my (n, result)
=
f (start map1, start map2, 0, ZERO);
MAP (n, link_all result);
};
#
fun set'' ((EMPTY, _), n, result)
=>
(n, result);
set'' ((TREE_NODE(_, _, key1, val1, _), r), n, result)
=>
set'' (next r, n+1, add_item (key1, val1, result));
end;
herein
# Return a map whose domain is the union
# of the domains of the two input maps,
# using 'merge_fn' to select the vals
# for keys that are in both domains.
#
fun union_with merge_fn
=
wrap union
where
fun union (tree1, tree2, n, result)
=
case ( next tree1,
next tree2
)
#
((EMPTY, _), (EMPTY, _)) => (n, result);
((EMPTY, _), tree2 ) => set'' (tree2, n, result);
(tree1, (EMPTY, _)) => set'' (tree1, n, result);
( (TREE_NODE(_, _, key1, val1, _), rest1),
(TREE_NODE(_, _, key2, val2, _), rest2)
)
=>
case (key::compare (key1, key2))
#
LESS => union (rest1, tree2, n+1, add_item (key1, val1, result));
EQUAL => union (rest1, rest2, n+1, add_item (key1, merge_fn (val1, val2), result));
GREATER => union (tree1, rest2, n+1, add_item (key2, val2, result));
esac;
esac;
end;
#
fun keyed_union_with merge_fn
=
wrap union
where
fun union (tree1, tree2, n, result)
=
case ( next tree1,
next tree2
)
#
((EMPTY, _), (EMPTY, _)) => (n, result);
((EMPTY, _), tree2 ) => set'' (tree2, n, result);
(tree1, (EMPTY, _)) => set'' (tree1, n, result);
( (TREE_NODE(_, _, key1, val1, _), rest1),
(TREE_NODE(_, _, key2, val2, _), rest2)
)
=>
case (key::compare (key1, key2))
#
LESS => union (rest1, tree2, n+1, add_item (key1, val1, result));
EQUAL => union (rest1, rest2, n+1, add_item (key1, merge_fn (key1, val1, val2), result));
GREATER => union (tree1, rest2, n+1, add_item (key2, val2, result));
esac;
esac;
end;
# Return a map whose domain is
# the intersection of the domains
# of the two input maps, using the
# supplied function to define the range.
#
fun intersect_with merge_fn
=
wrap intersect
where
fun intersect (tree1, tree2, n, result)
=
case ( next tree1,
next tree2
)
#
((TREE_NODE(_, _, key1, val1, _), r1), (TREE_NODE(_, _, key2, val2, _), r2))
=>
case (key::compare (key1, key2))
#
LESS => intersect (r1, tree2, n, result);
EQUAL => intersect (r1, r2, n+1, add_item (key1, merge_fn (val1, val2), result));
GREATER => intersect (tree1, r2, n, result);
esac;
_ => (n, result);
esac;
end;
#
fun keyed_intersect_with merge_fn
=
wrap intersect
where
fun intersect (tree1, tree2, n, result)
=
case ( next tree1,
next tree2
)
#
( (TREE_NODE(_, _, key1, val1, _), r1),
(TREE_NODE(_, _, key2, val2, _), r2)
)
=>
case (key::compare (key1, key2))
#
LESS => intersect (r1, tree2, n, result);
EQUAL => intersect (r1, r2, n+1, add_item (key1, merge_fn (key1, val1, val2), result));
GREATER => intersect (tree1, r2, n, result);
esac;
_ => (n, result);
esac;
end;
#
fun merge_with merge_fn
=
wrap merge
where
fun merge (tree1, tree2, n, result)
=
case ( next tree1,
next tree2
)
#
( (EMPTY, _),
(EMPTY, _)
)
=>
(n, result);
((EMPTY, _), (TREE_NODE(_, _, key2, val2, _), r2))
=>
mergef (key2, NULL, THE val2, tree1, r2, n, result);
((TREE_NODE(_, _, key1, val1, _), r1), (EMPTY, _))
=>
mergef (key1, THE val1, NULL, r1, tree2, n, result);
( (TREE_NODE(_, _, key1, val1, _), r1),
(TREE_NODE(_, _, key2, val2, _), r2)
)
=>
case (key::compare (key1, key2))
#
LESS => mergef (key1, THE val1, NULL, r1, tree2, n, result);
EQUAL => mergef (key1, THE val1, THE val2, r1, r2, n, result);
GREATER => mergef (key2, NULL, THE val2, tree1, r2, n, result);
esac;
esac
also
fun mergef (k, x1, x2, r1, r2, n, result)
=
case (merge_fn (x1, x2))
#
THE val2 => merge (r1, r2, n+1, add_item (k, val2, result));
NULL => merge (r1, r2, n, result );
esac;
end;
#
fun keyed_merge_with merge_fn
=
wrap merge
where
fun merge (tree1, tree2, n, result)
=
case ( next tree1,
next tree2
)
#
( (EMPTY, _),
(EMPTY, _)
)
=>
(n, result);
((EMPTY, _), (TREE_NODE(_, _, key2, val2, _), r2))
=>
mergef (key2, NULL, THE val2, tree1, r2, n, result);
((TREE_NODE(_, _, key1, val1, _), r1), (EMPTY, _))
=>
mergef (key1, THE val1, NULL, r1, tree2, n, result);
((TREE_NODE(_, _, key1, val1, _), r1), (TREE_NODE(_, _, key2, val2, _), r2))
=>
case (key::compare (key1, key2))
#
LESS => mergef (key1, THE val1, NULL, r1, tree2, n, result);
EQUAL => mergef (key1, THE val1, THE val2, r1, r2, n, result);
GREATER => mergef (key2, NULL, THE val2, tree1, r2, n, result);
esac;
esac
also
fun mergef (k, x1, x2, r1, r2, n, result)
=
case (merge_fn (k, x1, x2))
#
THE val2 => merge (r1, r2, n+1, add_item (k, val2, result));
NULL => merge (r1, r2, n, result);
esac;
end;
end; # stipulate
#
fun apply f
=
{ fun appf EMPTY
=>
();
appf (TREE_NODE(_, a, _, val1, b))
=>
{ appf a;
f val1;
appf b;
};
end;
\\ (MAP(_, m))
=
appf m;
};
#
fun keyed_apply f
=
{ fun appf EMPTY
=>
();
appf (TREE_NODE(_, a, key1, val1, b))
=>
{ appf a;
f (key1, val1);
appf b;
};
end;
\\ (MAP(_, m))
=
appf m;
};
#
fun map f
=
{ fun mapf EMPTY
=>
EMPTY;
mapf (TREE_NODE (color, a, key1, val1, b))
=>
TREE_NODE (color, mapf a, key1, f val1, mapf b);
end;
\\ (MAP (n, m))
=
MAP (n, mapf m);
};
#
fun keyed_map f
=
{ fun mapf EMPTY
=>
EMPTY;
mapf (TREE_NODE (color, a, key1, val1, b))
=>
TREE_NODE (color, mapf a, key1, f (key1, val1), mapf b);
end;
\\ (MAP (n, m))
=
MAP (n, mapf m);
};
# Filter out those elements of the map
# that do not satisfy given predicate.
#
# The filtering is done in increasing map order:
#
fun filter predicate (MAP(_, t))
=
MAP (n, link_all result)
where
fun walk (EMPTY, n, result)
=>
(n, result);
walk (TREE_NODE(_, a, key1, val1, b), n, result)
=>
{ my (n, result)
=
walk (a, n, result);
if (predicate val1) walk (b, n+1, add_item (key1, val1, result));
else walk (b, n, result); fi;
};
end;
my (n, result)
=
walk (t, 0, ZERO);
end;
#
fun keyed_filter predicate (MAP(_, t))
=
MAP (n, link_all result)
where
fun walk (EMPTY, n, result)
=>
(n, result);
walk (TREE_NODE(_, a, key1, val1, b), n, result)
=>
{ my (n, result)
=
walk (a, n, result);
if (predicate (key1, val1)) walk (b, n+1, add_item (key1, val1, result));
else walk (b, n, result); fi;
};
end;
my (n, result)
=
walk (t, 0, ZERO);
end;
# Map a partial function
# over the elements of a map
# in increasing map order:
#
fun map' f
=
keyed_fold_forward f' empty
where
fun f' (key1, val1, m)
=
case (f val1)
#
THE val2 => set (m, key1, val2);
NULL => m;
esac;
end;
#
fun keyed_map' f
=
keyed_fold_forward f' empty
where
fun f' (key1, val1, m)
=
case (f (key1, val1))
#
NULL => m;
THE val2 => set (m, key1, val2);
esac;
end;
};