## tuplebase.pkg
#
# First-cut simple fully-persistent tuplebase.
# This version supports only duples and triples. [1]
#
# We deliberately do not support searching the
# tuplebase by String or Float values. [2]
#
# We do support searching the tuplebase by any
# combination of slots. [3]
#
# Space usage will be dominated by tuples rather than Atoms:
#
# Each Duple will consume:
# 2 words directly
# 2 words each in 2 single-field indices.
# 1 word each in 1 Duple-set index.
# --------------
# 7 words total. Internal heap overhead will add another 3 words or so; call it 10 words/duple == 40 bytes/duple on a 32-bit machine.
#
# Each Triple will consume:
# 3 words directly
# 2 words each in 3 single-field indices.
# 4 words each in 3 double-field indices.
# 1 word each in 1 Triple-set index.
# --------------
# 22 words total. Internal heap overhead will add another 9 words or so; call it 30 words/triple == 120 bytes/triple on a 32-bit machine.
#
# So on a 32-bit machine a tuplebase containing million triples will
# consume about 128MB -- quite reasonable on today's desktop machines.
#
# On a 64-bit machine that would be 256MB -- except Mythryl doesn't
# support 64-bit architectures yet. :-) -- 2014-07-16 CrT
# Compiled by:
#
src/lib/std/standard.libstipulate
package im1 = int_red_black_map; # int_red_black_map is from
src/lib/src/int-red-black-map.pkg package is1 = int_red_black_set; # int_red_black_set is from
src/lib/src/int-red-black-set.pkgherein
package tuplebase
: Tuplebase # Tuplebase is from
src/lib/src/tuplebase.api {
Other = Exception;
#
Atom_Datum = NONE
| FLOAT Float
| STRING String
| OTHER Other
| TBASE Exception
# Making Atom_Datum and Tuplebase mutually recursive would be messy, so we use the exception hack instead.
;
Atom = { id: Int,
datum: Atom_Datum
};
Duple = (Atom, Atom);
Triple = (Atom, Atom, Atom);
fun compare_i2
( ( i1a: Int,
i1b: Int
),
( i2a: Int,
i2b: Int
)
)
=
case (int::compare (i1a, i2a))
#
GREATER => GREATER;
LESS => LESS;
EQUAL => int::compare (i1b, i2b);
esac;
fun compare_12of2
( ( { id => id1a, ... },
{ id => id1b, ... }
): Duple,
( { id => id2a, ... },
{ id => id2b, ... }
): Duple
)
=
case (int::compare (id1a, id2a))
#
GREATER => GREATER;
LESS => LESS;
EQUAL => (int::compare (id1b, id2b));
esac;
fun compare_12of3
( ( { id => id1a, ... },
{ id => id1b, ... },
{ id => id1c, ... }
): Triple,
( { id => id2a, ... },
{ id => id2b, ... },
{ id => id2c, ... }
): Triple
)
=
case (int::compare (id1a, id2a))
#
GREATER => GREATER;
LESS => LESS;
EQUAL => (int::compare (id1b, id2b));
esac;
fun compare_13of3
( ( { id => id1a, ... },
{ id => id1b, ... },
{ id => id1c, ... }
): Triple,
( { id => id2a, ... },
{ id => id2b, ... },
{ id => id2c, ... }
): Triple
)
=
case (int::compare (id1a, id2a))
#
GREATER => GREATER;
LESS => LESS;
EQUAL => (int::compare (id1c, id2c));
esac;
fun compare_23of3
( ( { id => id1a, ... },
{ id => id1b, ... },
{ id => id1c, ... }
): Triple,
( { id => id2a, ... },
{ id => id2b, ... },
{ id => id2c, ... }
): Triple
)
=
case (int::compare (id1b, id2b))
#
GREATER => GREATER;
LESS => LESS;
EQUAL => (int::compare (id1c, id2c));
esac;
fun compare_123of3
( ( { id => id1a, ... },
{ id => id1b, ... },
{ id => id1c, ... }
): Triple,
( { id => id2a, ... },
{ id => id2b, ... },
{ id => id2c, ... }
): Triple
)
=
case (int::compare (id1a, id2a))
#
GREATER => GREATER;
LESS => LESS;
EQUAL => case (int::compare (id1b, id2b))
GREATER => GREATER;
LESS => LESS;
EQUAL => int::compare (id1c, id2c);
esac;
esac;
fun compare_123of3
( ( { id => id1a, ... },
{ id => id1b, ... },
{ id => id1c, ... }
): Triple,
( { id => id2a, ... },
{ id => id2b, ... },
{ id => id2c, ... }
): Triple
)
=
case (int::compare (id1a, id2a))
#
GREATER => GREATER;
LESS => LESS;
EQUAL => case (int::compare (id1b, id2b))
GREATER => GREATER;
LESS => LESS;
EQUAL => int::compare (id1c, id2c);
esac;
esac;
package im2
=
red_black_map_g (
#
package {
Key = (Int, Int);
#
compare = compare_i2;
}
);
package ds # Sets of Duples
=
red_black_set_g ( # red_black_set_g is from
src/lib/src/red-black-set-g.pkg #
package {
Key = Duple;
#
compare = compare_12of2;
}
);
package ts # Sets of Triples
=
red_black_set_g ( # red_black_set_g is from
src/lib/src/red-black-set-g.pkg #
package {
Key = Triple;
#
compare = compare_123of3;
}
);
Tuplebase
=
{ index_1of2: im1::Map( ds::Set ),
index_2of2: im1::Map( ds::Set ),
#
index_12of2: ds::Set,
#
#
index_1of3: im1::Map( ts::Set ),
index_2of3: im1::Map( ts::Set ),
index_3of3: im1::Map( ts::Set ),
#
index_12of3: im2::Map( ts::Set ),
index_13of3: im2::Map( ts::Set ),
index_23of3: im2::Map( ts::Set ),
#
index_123of3: ts::Set
};
empty_tuplebase
=
{ index_1of2 => im1::empty: im1::Map( ds::Set ),
index_2of2 => im1::empty: im1::Map( ds::Set ),
#
index_12of2 => ds::empty: ds::Set,
#
#
index_1of3 => im1::empty: im1::Map( ts::Set ),
index_2of3 => im1::empty: im1::Map( ts::Set ),
index_3of3 => im1::empty: im1::Map( ts::Set ),
#
index_12of3 => im2::empty: im2::Map( ts::Set ),
index_13of3 => im2::empty: im2::Map( ts::Set ),
index_23of3 => im2::empty: im2::Map( ts::Set ),
#
index_123of3 => ts::empty: ts::Set
};
fun put_duple
(
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase,
duple as
( atom1 as { id => id1, ... },
atom2 as { id => id2, ... }
): Duple
)
=
{ index_1of2
=
case (im1::get (index_1of2, id1))
#
THE set => im1::set (index_1of2, id1, ds::add (set, duple));
NULL => im1::set (index_1of2, id1, ds::singleton(duple));
esac;
index_2of2
=
case (im1::get (index_2of2, id2))
#
THE set => im1::set (index_2of2, id2, ds::add (set, duple));
NULL => im1::set (index_2of2, id2, ds::singleton(duple));
esac;
index_12of2
=
ds::add (index_12of2, duple);
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase;
};
fun put_triple
(
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase,
triple as
( atom1 as { id => id1, ... },
atom2 as { id => id2, ... },
atom3 as { id => id3, ... }
): Triple
)
=
{ index_1of3
=
case (im1::get (index_1of3, id1))
#
THE set => im1::set (index_1of3, id1, ts::add (set, triple));
NULL => im1::set (index_1of3, id1, ts::singleton(triple));
esac;
index_2of3
=
case (im1::get (index_2of3, id2))
#
THE set => im1::set (index_2of3, id2, ts::add (set, triple));
NULL => im1::set (index_2of3, id2, ts::singleton(triple));
esac;
index_3of3
=
case (im1::get (index_3of3, id3))
#
THE set => im1::set (index_3of3, id3, ts::add (set, triple));
NULL => im1::set (index_3of3, id3, ts::singleton(triple));
esac;
index_12of3
=
case (im2::get (index_12of3, (id1, id2)))
#
THE set => im2::set (index_12of3, (id1, id2), ts::add (set, triple));
NULL => im2::set (index_12of3, (id1, id2), ts::singleton(triple));
esac;
index_13of3
=
case (im2::get (index_13of3, (id1, id3)))
#
THE set => im2::set (index_13of3, (id1, id3), ts::add (set, triple));
NULL => im2::set (index_13of3, (id1, id3), ts::singleton(triple));
esac;
index_23of3
=
case (im2::get (index_23of3, (id2, id3)))
#
THE set => im2::set (index_23of3, (id2, id3), ts::add (set, triple));
NULL => im2::set (index_23of3, (id2, id3), ts::singleton(triple));
esac;
index_123of3
=
ts::add (index_123of3, triple);
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase;
};
fun drop_duple
(
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase,
duple as
( atom1 as { id => id1, ... },
atom2 as { id => id2, ... }
): Duple
)
=
{ index_1of2
=
case (im1::get (index_1of2, id1))
#
THE set => if (ds::vals_count(set) > 1) im1::set (index_1of2, id1, ds::drop (set, duple));
else im1::drop (index_1of2, id1);
fi;
NULL => index_1of2; # Duple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_2of2
=
case (im1::get (index_2of2, id2))
#
THE set => if (ds::vals_count(set) > 1) im1::set (index_2of2, id2, ds::drop (set, duple));
else im1::drop (index_1of2, id2);
fi;
NULL => index_2of2; # Duple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_12of2
=
ds::drop (index_12of2, duple);
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase;
};
fun drop_triple
(
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase,
triple as
( atom1 as { id => id1, ... },
atom2 as { id => id2, ... },
atom3 as { id => id3, ... }
): Triple
)
=
{ index_1of3
=
case (im1::get (index_1of3, id1))
#
THE set => if (ts::vals_count(set) > 1) im1::set (index_1of3, id1, ts::drop (set, triple));
else im1::drop (index_1of3, id1);
fi;
NULL => index_1of3; # Triple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_2of3
=
case (im1::get (index_2of3, id2))
#
THE set => if (ts::vals_count(set) > 1) im1::set (index_2of3, id2, ts::drop (set, triple));
else im1::drop (index_2of3, id2);
fi;
NULL => index_2of3; # Triple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_3of3
=
case (im1::get (index_3of3, id3))
#
THE set => if (ts::vals_count(set) > 1) im1::set (index_3of3, id3, ts::drop (set, triple));
else im1::drop (index_3of3, id3);
fi;
NULL => index_3of3; # Triple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_12of3
=
case (im2::get (index_12of3, (id1, id2)))
#
THE set => if (ts::vals_count(set) > 1) im2::set (index_12of3, (id1, id2), ts::drop (set, triple));
else im2::drop (index_12of3, (id1, id2));
fi;
NULL => index_12of3; # Triple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_13of3
=
case (im2::get (index_13of3, (id1, id3)))
#
THE set => if (ts::vals_count(set) > 1) im2::set (index_13of3, (id1, id3), ts::drop (set, triple));
else im2::drop (index_13of3, (id1, id3));
fi;
NULL => index_13of3; # Triple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_23of3
=
case (im2::get (index_23of3, (id2, id3)))
#
THE set => if (ts::vals_count(set) > 1) im2::set (index_23of3, (id2, id3), ts::drop (set, triple));
else im2::drop (index_23of3, (id2, id3));
fi;
NULL => index_23of3; # Triple isn't in tuplebase. Possibly we should raise an exception here.
esac;
index_123of3
=
ts::drop (index_123of3, triple);
{ index_1of2,
index_2of2,
#
index_12of2,
#
#
index_1of3,
index_2of3,
index_3of3,
#
index_12of3,
index_13of3,
index_23of3,
#
index_123of3
}: Tuplebase;
};
fun get_duples (t: Tuplebase) = t.index_12of2;
#
fun get_duples1 (t: Tuplebase, a: Atom) = im1::get (t.index_1of2, a.id);
fun get_duples2 (t: Tuplebase, a: Atom) = im1::get (t.index_2of2, a.id);
#
fun has_duple (t: Tuplebase, d: Duple) = ds::member (t.index_12of2, d);
fun get_triples (t: Tuplebase) = t.index_123of3;
#
fun get_triples1 (t: Tuplebase, a: Atom) = im1::get (t.index_1of3, a.id);
fun get_triples2 (t: Tuplebase, a: Atom) = im1::get (t.index_2of3, a.id);
fun get_triples3 (t: Tuplebase, a: Atom) = im1::get (t.index_3of3, a.id);
#
fun get_triples12 (t: Tuplebase, a: Atom, b: Atom) = im2::get (t.index_12of3, (a.id, b.id));
fun get_triples13 (t: Tuplebase, a: Atom, c: Atom) = im2::get (t.index_13of3, (a.id, c.id));
fun get_triples23 (t: Tuplebase, b: Atom, c: Atom) = im2::get (t.index_23of3, (b.id, c.id));
#
fun has_triple (t: Tuplebase, d: Triple) = ts::member (t.index_123of3, d);
fun make_atom ()
=
{ id => id_to_int (issue_unique_id ()),
datum => NONE
};
fun make_string_atom (s: String)
=
{ id => id_to_int (issue_unique_id ()),
datum => STRING s
};
fun make_float_atom (f: Float)
=
{ id => id_to_int (issue_unique_id ()),
datum => FLOAT f
};
fun make_other_atom (x: Other)
=
{ id => id_to_int (issue_unique_id ()),
datum => OTHER x
};
exception TUPLEBASE Tuplebase; # Making Atom_Datum and Tuplebase mutually recursive would be messy, so we use the exception hack instead.
fun make_tuplebase_atom (tuplebase: Tuplebase)
=
{ id => id_to_int (issue_unique_id ()),
datum => TBASE (TUPLEBASE tuplebase)
};
fun string_of ({ id, datum => STRING s }: Atom) => THE s;
string_of _ => NULL;
end;
fun float_of ({ id, datum => FLOAT f }: Atom) => THE f;
float_of _ => NULL;
end;
fun other_of ({ id, datum => OTHER x }: Atom) => THE x;
other_of _ => NULL;
end;
fun tuplebase_of ({ id, datum => TBASE (TUPLEBASE tuplebase) }: Atom) => THE tuplebase;
tuplebase_of _ => NULL;
end;
fun atoms_apply # Apply do_atom to all Atoms in Tuplebase.
( { index_12of2,
index_123of3,
...
}: Tuplebase
)
(do_atom: Atom -> Void)
=
{ ds::apply do_duple index_12of2;
ts::apply do_triple index_123of3;
}
where
already_seen = REF is1::empty;
#
fun do_duple ((a1, a2): Duple)
=
{
if (not (is1::member (*already_seen, a1.id)))
#
already_seen := is1::add (*already_seen, a1.id);
do_atom a1;
fi;
if (not (is1::member (*already_seen, a2.id)))
#
already_seen := is1::add (*already_seen, a2.id);
do_atom a2;
fi;
};
fun do_triple ((a1, a2, a3): Triple)
=
{
if (not (is1::member (*already_seen, a1.id)))
#
already_seen := is1::add (*already_seen, a1.id);
do_atom a1;
fi;
if (not (is1::member (*already_seen, a2.id)))
#
already_seen := is1::add (*already_seen, a2.id);
do_atom a2;
fi;
if (not (is1::member (*already_seen, a3.id)))
#
already_seen := is1::add (*already_seen, a3.id);
do_atom a3;
fi;
};
end;
};
end;
##########################################################################
# Note[1].
#
# Duples and triples cover the majority of practical cases.
#
# Also, supporting longer tuples would result in an exponential
# explosion in the number of indices, using our simple policy
# of maintaining all possible indices. Supporting longer
# tuples in a sensible way would require a different design,
# and result in a much more complex tuplebase. We may eventually
# want that more complex tuplebase, but having a simple tuplebase
# available will still be nice.
##########################################################################
# Note[2].
#
# We deliberately do not support searching the
# tuplebase by String or Float values. This
# feels messy, application-specific, and outside
# the core mission of this package.
#
# Also, in most cases applications that need
# a little functionality of this sort can simply
# maintain their own String -> Atom indices,
# or can wrap Tuplebase values with extra indices.
#
# Burdening applications which don't need such
# functionality with a lot of extra overhead seems
# Wrong. If there is enough need for a fancier
# tuplebase, it should probably be implemented as
# a separate fancy_tuplebase package, rather than
# complicate this package.
##########################################################################
# Note[3].
#
# We support searching the tuplebase by any
# combination of slots, and maintain indices
# for all such possibilities.
#
# This is likely to be overkill for any specific
# application; the intent is to provide a clean,
# simple off-the-shelf solution for general-purpose
# needs, not to be optimally space and time efficient
# for any specific application.
#
# If space and time efficiency prove critical, we
# should probably implement a code generator which
# accepts a specification of the needs of a particular
# application and produces a tuplebase custom-tuned
# to that specification.
#
# The package is also small enough to simply manually
# clone-and-mutate on an occasional basis.