src/lib/src/tuplebase.pkg

15.4.1015 src/lib/src/tuplebase.pkg

## tuplebase.pkg
#
# First-cut simple fully-persistent tuplebase.
# This version supports only duples and triples. [1]
#
# We deliberately do not support searching the
# tuplebase by String or Float values. [2]
#
# We do support searching the tuplebase by any
# combination of slots. [3]
#
# Space usage will be dominated by tuples rather than Atoms:
#
#     Each Duple will consume:
#        2 words directly
#        2 words each in 2 single-field indices.
#        1 word  each in 1 Duple-set    index.
#      --------------
#        7 words total.  Internal heap overhead will add another 3 words or so; call it 10 words/duple == 40 bytes/duple on a 32-bit machine.
#
#     Each Triple will consume:
#        3 words directly
#        2 words each in 3 single-field indices.
#        4 words each in 3 double-field indices.
#        1 word  each in 1 Triple-set   index.
#      --------------
#       22 words total.  Internal heap overhead will add another 9 words or so; call it 30 words/triple == 120 bytes/triple on a 32-bit machine.
#
# So on a 32-bit machine a tuplebase containing million triples will
# consume about 128MB -- quite reasonable on today's desktop machines.
#
# On a 64-bit machine that would be 256MB -- except Mythryl doesn't
# support 64-bit architectures yet. :-)         -- 2014-07-16 CrT

# Compiled by:
#     src/lib/std/standard.lib

stipulate
    package im1  =  int_red_black_map;                                          # int_red_black_map                             is from   src/lib/src/int-red-black-map.pkg
    package is1  =  int_red_black_set;                                          # int_red_black_set                             is from   src/lib/src/int-red-black-set.pkg
herein

    package tuplebase
    :       Tuplebase                                                           # Tuplebase                                     is from   src/lib/src/tuplebase.api
    {
        Other = Exception;
        #
        Atom_Datum = NONE
                   | FLOAT  Float
                   | STRING String
                   | OTHER  Other
                   | TBASE  Exception                                           # Making Atom_Datum and Tuplebase mutually recursive would be messy, so we use the exception hack instead.
                   ;

        Atom = { id:    Int,
                 datum: Atom_Datum
               };

        Duple  = (Atom, Atom);
        Triple = (Atom, Atom, Atom);

        fun compare_i2
              ( ( i1a: Int,
                  i1b: Int
                ),
                ( i2a: Int,
                  i2b: Int
                )
              )
            =
            case (int::compare (i1a, i2a))
                #
                GREATER =>  GREATER;
                LESS    =>  LESS;
                EQUAL   =>  int::compare (i1b, i2b);
            esac;

        fun compare_12of2
              ( ( { id => id1a, ... },
                  { id => id1b, ... }
                ):                              Duple,
                ( { id => id2a, ... },
                  { id => id2b, ... }
                ):                              Duple
              )
            =
            case (int::compare (id1a, id2a))
                #
                GREATER =>  GREATER;
                LESS    =>  LESS;
                EQUAL   =>  (int::compare (id1b, id2b));
            esac;

        fun compare_12of3
              ( ( { id => id1a, ... },
                  { id => id1b, ... },
                  { id => id1c, ... }
                ):                              Triple,
                ( { id => id2a, ... },
                  { id => id2b, ... },
                  { id => id2c, ... }
                ):                              Triple
              )
            =
            case (int::compare (id1a, id2a))
                #
                GREATER =>  GREATER;
                LESS    =>  LESS;
                EQUAL   =>  (int::compare (id1b, id2b));
            esac;

        fun compare_13of3
              ( ( { id => id1a, ... },
                  { id => id1b, ... },
                  { id => id1c, ... }
                ):                              Triple,
                ( { id => id2a, ... },
                  { id => id2b, ... },
                  { id => id2c, ... }
                ):                              Triple
              )
            =
            case (int::compare (id1a, id2a))
                #
                GREATER =>  GREATER;
                LESS    =>  LESS;
                EQUAL   =>  (int::compare (id1c, id2c));
            esac;

        fun compare_23of3
              ( ( { id => id1a, ... },
                  { id => id1b, ... },
                  { id => id1c, ... }
                ):                              Triple,
                ( { id => id2a, ... },
                  { id => id2b, ... },
                  { id => id2c, ... }
                ):                              Triple
              )
            =
            case (int::compare (id1b, id2b))
                #
                GREATER =>  GREATER;
                LESS    =>  LESS;
                EQUAL   =>  (int::compare (id1c, id2c));
            esac;

        fun compare_123of3
              ( ( { id => id1a, ... },
                  { id => id1b, ... },
                  { id => id1c, ... }
                ):                              Triple,
                ( { id => id2a, ... },
                  { id => id2b, ... },
                  { id => id2c, ... }
                ):                              Triple
              )
            =
            case (int::compare (id1a, id2a))
                #
                GREATER =>  GREATER;
                LESS    =>  LESS;
                EQUAL   =>  case (int::compare (id1b, id2b))
                                GREATER =>  GREATER;
                                LESS    =>  LESS;
                                EQUAL   =>  int::compare (id1c, id2c);
                            esac;
            esac;

        fun compare_123of3
              ( ( { id => id1a, ... },
                  { id => id1b, ... },
                  { id => id1c, ... }
                ):                              Triple,
                ( { id => id2a, ... },
                  { id => id2b, ... },
                  { id => id2c, ... }
                ):                              Triple
              )
            =
            case (int::compare (id1a, id2a))
                #
                GREATER =>  GREATER;
                LESS    =>  LESS;
                EQUAL   =>  case (int::compare (id1b, id2b))
                                GREATER =>  GREATER;
                                LESS    =>  LESS;
                                EQUAL   =>  int::compare (id1c, id2c);
                            esac;
            esac;

        package im2
            =
            red_black_map_g (
                #
                package {
                    Key = (Int, Int);
                    #
                    compare = compare_i2;
                }
            );

        package ds                                                              # Sets of Duples
            =
            red_black_set_g (                                                   # red_black_set_g                               is from   src/lib/src/red-black-set-g.pkg
                #
                package {
                    Key = Duple;
                    #
                    compare = compare_12of2;
                }
            );

        package ts                                                              # Sets of Triples
            =
            red_black_set_g (                                                   # red_black_set_g                               is from   src/lib/src/red-black-set-g.pkg
                #
                package {
                    Key = Triple;
                    #
                    compare = compare_123of3;
                }
            );

        Tuplebase
          =
          { index_1of2:         im1::Map( ds::Set ),
            index_2of2:         im1::Map( ds::Set ),
            #
            index_12of2:                  ds::Set,
            #
            #
            index_1of3:         im1::Map( ts::Set ),
            index_2of3:         im1::Map( ts::Set ),
            index_3of3:         im1::Map( ts::Set ),
            #
            index_12of3:        im2::Map( ts::Set ),
            index_13of3:        im2::Map( ts::Set ),
            index_23of3:        im2::Map( ts::Set ),
            #
            index_123of3:                 ts::Set
          };

        empty_tuplebase
          =
          { index_1of2   =>     im1::empty:     im1::Map( ds::Set ),
            index_2of2   =>     im1::empty:     im1::Map( ds::Set ),
            #
            index_12of2  =>     ds::empty:                ds::Set,
            #
            #
            index_1of3   =>     im1::empty:     im1::Map( ts::Set ),
            index_2of3   =>     im1::empty:     im1::Map( ts::Set ),
            index_3of3   =>     im1::empty:     im1::Map( ts::Set ),
            #
            index_12of3  =>     im2::empty:     im2::Map( ts::Set ),
            index_13of3  =>     im2::empty:     im2::Map( ts::Set ),
            index_23of3  =>     im2::empty:     im2::Map( ts::Set ),
            #
            index_123of3 =>     ts::empty:                ts::Set
          };

        fun  put_duple
              (
                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase,
                duple as
                ( atom1 as { id => id1, ... },
                  atom2 as { id => id2, ... }
                ):                                                      Duple
              )
            =
            {   index_1of2
                    =
                    case (im1::get (index_1of2, id1))
                        #
                        THE set =>  im1::set (index_1of2, id1, ds::add (set, duple));
                        NULL    =>  im1::set (index_1of2, id1, ds::singleton(duple));
                    esac;

                index_2of2
                    =
                    case (im1::get (index_2of2, id2))
                        #
                        THE set =>  im1::set (index_2of2, id2, ds::add (set, duple));
                        NULL    =>  im1::set (index_2of2, id2, ds::singleton(duple));
                    esac;

                index_12of2
                    =
                    ds::add (index_12of2, duple);

                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase;
            };

        fun  put_triple
              (
                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase,
                triple as
                ( atom1 as { id => id1, ... },
                  atom2 as { id => id2, ... },
                  atom3 as { id => id3, ... }
                ):                                                      Triple
              )
            =
            {   index_1of3
                    =
                    case (im1::get (index_1of3, id1))
                        #
                        THE set =>  im1::set (index_1of3, id1, ts::add (set, triple));
                        NULL    =>  im1::set (index_1of3, id1, ts::singleton(triple));
                    esac;

                index_2of3
                    =
                    case (im1::get (index_2of3, id2))
                        #
                        THE set =>  im1::set (index_2of3, id2, ts::add (set, triple));
                        NULL    =>  im1::set (index_2of3, id2, ts::singleton(triple));
                    esac;

                index_3of3
                    =
                    case (im1::get (index_3of3, id3))
                        #
                        THE set =>  im1::set (index_3of3, id3, ts::add (set, triple));
                        NULL    =>  im1::set (index_3of3, id3, ts::singleton(triple));
                    esac;

                index_12of3
                    =
                    case (im2::get (index_12of3, (id1, id2)))
                        #
                        THE set =>  im2::set (index_12of3, (id1, id2), ts::add (set, triple));
                        NULL    =>  im2::set (index_12of3, (id1, id2), ts::singleton(triple));
                    esac;

                index_13of3
                    =
                    case (im2::get (index_13of3, (id1, id3)))
                        #
                        THE set =>  im2::set (index_13of3, (id1, id3), ts::add (set, triple));
                        NULL    =>  im2::set (index_13of3, (id1, id3), ts::singleton(triple));
                    esac;

                index_23of3
                    =
                    case (im2::get (index_23of3, (id2, id3)))
                        #
                        THE set =>  im2::set (index_23of3, (id2, id3), ts::add (set, triple));
                        NULL    =>  im2::set (index_23of3, (id2, id3), ts::singleton(triple));
                    esac;

                index_123of3
                    =
                    ts::add (index_123of3, triple);

                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase;
            };

        fun  drop_duple
              (
                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase,
                duple as
                ( atom1 as { id => id1, ... },
                  atom2 as { id => id2, ... }
                ):                                                      Duple
              )
            =
            {   index_1of2
                    =
                    case (im1::get (index_1of2, id1))
                        #
                        THE set =>  if (ds::vals_count(set) > 1)  im1::set  (index_1of2, id1, ds::drop (set, duple));
                                    else                          im1::drop (index_1of2, id1);
                                    fi;
                        NULL    =>  index_1of2;                 # Duple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_2of2
                    =
                    case (im1::get (index_2of2, id2))
                        #
                        THE set =>  if (ds::vals_count(set) > 1)  im1::set  (index_2of2, id2, ds::drop (set, duple));
                                    else                          im1::drop (index_1of2, id2);
                                    fi;
                        NULL    =>  index_2of2;                 # Duple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_12of2
                    =
                    ds::drop (index_12of2, duple);

                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase;
            };

        fun  drop_triple
              (
                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase,
                triple as
                ( atom1 as { id => id1, ... },
                  atom2 as { id => id2, ... },
                  atom3 as { id => id3, ... }
                ):                                                      Triple
              )
            =
            {   index_1of3
                    =
                    case (im1::get (index_1of3, id1))
                        #
                        THE set =>  if (ts::vals_count(set) > 1)  im1::set  (index_1of3, id1, ts::drop (set, triple));
                                    else                          im1::drop (index_1of3, id1);
                                    fi;
                        NULL    =>  index_1of3;                 # Triple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_2of3
                    =
                    case (im1::get (index_2of3, id2))
                        #
                        THE set =>  if (ts::vals_count(set) > 1)  im1::set  (index_2of3, id2, ts::drop (set, triple));
                                    else                          im1::drop (index_2of3, id2);
                                    fi;
                        NULL    =>  index_2of3;                 # Triple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_3of3
                    =
                    case (im1::get (index_3of3, id3))
                        #
                        THE set =>  if (ts::vals_count(set) > 1)  im1::set  (index_3of3, id3, ts::drop (set, triple));
                                    else                          im1::drop (index_3of3, id3);
                                    fi;
                        NULL    =>  index_3of3;                 # Triple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_12of3
                    =
                    case (im2::get (index_12of3, (id1, id2)))
                        #
                        THE set =>  if (ts::vals_count(set) > 1)  im2::set  (index_12of3, (id1, id2), ts::drop (set, triple));
                                    else                          im2::drop (index_12of3, (id1, id2));
                                    fi;
                        NULL    =>  index_12of3;                # Triple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_13of3
                    =
                    case (im2::get (index_13of3, (id1, id3)))
                        #
                        THE set =>  if (ts::vals_count(set) > 1)  im2::set  (index_13of3, (id1, id3), ts::drop (set, triple));
                                    else                          im2::drop (index_13of3, (id1, id3));
                                    fi;
                        NULL    =>  index_13of3;                # Triple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_23of3
                    =
                    case (im2::get (index_23of3, (id2, id3)))
                        #
                        THE set =>  if (ts::vals_count(set) > 1)  im2::set  (index_23of3, (id2, id3), ts::drop (set, triple));
                                    else                          im2::drop (index_23of3, (id2, id3));
                                    fi;
                        NULL    =>  index_23of3;                # Triple isn't in tuplebase. Possibly we should raise an exception here.
                    esac;

                index_123of3
                    =
                    ts::drop (index_123of3, triple);

                { index_1of2,
                  index_2of2,
                  #
                  index_12of2,
                  #
                  #
                  index_1of3,
                  index_2of3,
                  index_3of3,
                  #
                  index_12of3,
                  index_13of3,
                  index_23of3,
                  #
                  index_123of3
                }:                                                      Tuplebase;
            };

        fun get_duples    (t: Tuplebase)                   =              t.index_12of2;
        #
        fun get_duples1   (t: Tuplebase, a: Atom)          =  im1::get   (t.index_1of2, a.id);
        fun get_duples2   (t: Tuplebase, a: Atom)          =  im1::get   (t.index_2of2, a.id);
        #
        fun has_duple     (t: Tuplebase, d: Duple)         =  ds::member (t.index_12of2, d);

        fun get_triples   (t: Tuplebase)                   =              t.index_123of3;
        #
        fun get_triples1  (t: Tuplebase, a: Atom)          =  im1::get   (t.index_1of3, a.id);
        fun get_triples2  (t: Tuplebase, a: Atom)          =  im1::get   (t.index_2of3, a.id);
        fun get_triples3  (t: Tuplebase, a: Atom)          =  im1::get   (t.index_3of3, a.id);
        #
        fun get_triples12 (t: Tuplebase, a: Atom, b: Atom) =  im2::get   (t.index_12of3, (a.id, b.id));
        fun get_triples13 (t: Tuplebase, a: Atom, c: Atom) =  im2::get   (t.index_13of3, (a.id, c.id));
        fun get_triples23 (t: Tuplebase, b: Atom, c: Atom) =  im2::get   (t.index_23of3, (b.id, c.id));
        #
        fun has_triple    (t: Tuplebase, d: Triple)        =  ts::member (t.index_123of3, d);

        fun make_atom ()
            =
            { id    =>  id_to_int (issue_unique_id ()),
              datum =>  NONE
            };

        fun make_string_atom (s: String)
            =
            { id    =>  id_to_int (issue_unique_id ()),
              datum =>  STRING s
            };

        fun make_float_atom (f: Float)
            =
            { id    =>  id_to_int (issue_unique_id ()),
              datum =>  FLOAT f
            };

        fun make_other_atom (x: Other)
            =
            { id    =>  id_to_int (issue_unique_id ()),
              datum =>  OTHER x
            };

        exception TUPLEBASE Tuplebase;                                          # Making Atom_Datum and Tuplebase mutually recursive would be messy, so we use the exception hack instead.

        fun make_tuplebase_atom (tuplebase: Tuplebase)
            =
            { id    =>  id_to_int (issue_unique_id ()),
              datum =>  TBASE (TUPLEBASE tuplebase)
            };

        fun string_of ({ id, datum => STRING s }: Atom) =>  THE s;
            string_of _                                 =>  NULL;
        end;

        fun float_of  ({ id, datum => FLOAT  f }: Atom) =>  THE f;
            float_of  _                                 =>  NULL;
        end;

        fun other_of  ({ id, datum => OTHER  x }: Atom) =>  THE x;
            other_of  _                                 =>  NULL;
        end;

        fun tuplebase_of  ({ id, datum => TBASE (TUPLEBASE tuplebase) }: Atom) =>  THE tuplebase;
            tuplebase_of  _                                                    =>  NULL;
        end;

        fun atoms_apply                                                         # Apply do_atom to all Atoms in Tuplebase.
              ( { index_12of2,
                  index_123of3,
                  ...
                }:      Tuplebase
              )
              (do_atom: Atom -> Void)
            =
            {   ds::apply  do_duple   index_12of2;
                ts::apply  do_triple  index_123of3;
            }
            where
                already_seen =  REF is1::empty;
                #
                fun do_duple ((a1, a2): Duple)
                    =
                    {
                        if (not (is1::member (*already_seen, a1.id)))
                            #
                            already_seen :=  is1::add (*already_seen, a1.id);

                            do_atom  a1;
                        fi;

                        if (not (is1::member (*already_seen, a2.id)))
                            #
                            already_seen :=  is1::add (*already_seen, a2.id);

                            do_atom  a2;
                        fi;
                    };

                fun do_triple ((a1, a2, a3): Triple)
                    =
                    {
                        if (not (is1::member (*already_seen, a1.id)))
                            #
                            already_seen :=  is1::add (*already_seen, a1.id);

                            do_atom  a1;
                        fi;

                        if (not (is1::member (*already_seen, a2.id)))
                            #
                            already_seen :=  is1::add (*already_seen, a2.id);

                            do_atom  a2;
                        fi;

                        if (not (is1::member (*already_seen, a3.id)))
                            #
                            already_seen :=  is1::add (*already_seen, a3.id);

                            do_atom  a3;
                        fi;
                    };
            end;

    };
end;

##########################################################################
# Note[1].
#
# Duples and triples cover the majority of practical cases.
#
# Also, supporting longer tuples would result in an exponential
# explosion in the number of indices, using our simple policy
# of maintaining all possible indices.  Supporting longer
# tuples in a sensible way would require a different design,
# and result in a much more complex tuplebase.  We may eventually
# want that more complex tuplebase, but having a simple tuplebase
# available will still be nice.

##########################################################################
# Note[2].
#
# We deliberately do not support searching the
# tuplebase by String or Float values. This
# feels messy, application-specific, and outside
# the core mission of this package.
#
# Also, in most cases applications that need
# a little functionality of this sort can simply
# maintain their own String -> Atom indices,
# or can wrap Tuplebase values with extra indices.
#
# Burdening applications which don't need such
# functionality with a lot of extra overhead seems
# Wrong.  If there is enough need for a fancier
# tuplebase, it should probably be implemented as
# a separate fancy_tuplebase package, rather than
# complicate this package.

##########################################################################
# Note[3].
#
# We support searching the tuplebase by any
# combination of slots, and maintain indices
# for all such possibilities.
#
# This is likely to be overkill for any specific
# application; the intent is to provide a clean,
# simple off-the-shelf solution for general-purpose
# needs, not to be optimally space and time efficient
# for any specific application.
#
# If space and time efficiency prove critical, we
# should probably implement a code generator which
# accepts a specification of the needs of a particular
# application and produces a tuplebase custom-tuned
# to that specification.
#
# The package is also small enough to simply manually
# clone-and-mutate on an occasional basis.

Comments and suggestions to: bugs@mythryl.org