PreviousUpNext

15.4.856  src/lib/regex/backend/dfa.pkg

## dfa.pkg
#
# Deterministic finite-state machines.

# Compiled by:
#     src/lib/std/standard.lib



stipulate
    package fil =  file__premicrothread;                                        # file__premicrothread  is from   src/lib/std/src/posix/file--premicrothread.pkg
    package is  =  nfa::int_set;
    package nfa =  nfa;                                                         # nfa           is from   src/lib/regex/backend/nfa.pkg


    package int_set_set
        = 
        list_set_g (
            Key = is::Set;
            compare = is::compare;
        );

    package int2set
        = 
        list_set_g (
            #
            Key = (Int, Int);

            fun compare ((i1, i2), (j1, j2))
                = 
                case (int::compare (i1, j1))
                    #
                    EQUAL => int::compare (i2, j2);
                    v => v;
                esac;
        );


    package char_set
        = 
        list_set_g (
            Key = Char;
            compare = char::compare;
        );

    package iss =  int_set_set;
    package i2  =  int2set;
    package cs  =  char_set;
    package a2  =  rw_matrix;                                                   # rw_matrix     is from   src/lib/std/src/rw-matrix.pkg
    package rwv =  rw_vector;                                                   # rw_vector     is from   src/lib/std/src/rw-vector.pkg

    package map
        =
        list_map_g (
            Key = is::Set;
            compare = is::compare;
        );

    fun compare_char_option (NULL, NULL) => EQUAL;
        compare_char_option (NULL, THE (c)) => LESS;
        compare_char_option (THE (c), NULL) => GREATER;
        compare_char_option (THE (c), THE (c')) => char::compare (c, c');
    end;

herein

    package   dfa
    : (weak)  Dfa                                                               # Dfa           is from   src/lib/regex/backend/dfa.api
    {
        exception SYNTAX_NOT_HANDLED;

        Move = MOVE  (Int, Null_Or( Char ), Int);

        package mvs                                                             # "mvs" == "move_set".
            = 
            list_set_g (
                #
                Key = Move; 

                fun compare (MOVE (i, c, j), MOVE (i', c', j'))
                    =
                    case (int::compare (i, i'))

                        EQUAL => case (compare_char_option (c, c')) 
                                     EQUAL => int::compare (j, j');
                                     v => v;
                                 esac;

                        v => v;

                    esac;
            );

        # Create sets from lists 

        fun i_list l =  is::add_list (is::empty, l);
        fun m_list l =  mvs::add_list (mvs::empty, l);

        Dfa = DFA { states:       is::Set,
                    moves:        mvs::Set,
                    accepting:    i2::Set,
                    table:        a2::Rw_Matrix(    Null_Or(  Int ) ),
                    acc_table:    rwv::Rw_Vector( Null_Or( Int ) ),
                    start_table:  rwv::Rw_Vector( Bool )
                  };


        fun print (DFA { states, moves, accepting, ... } )
            = 
            {   pr    =  fil::print;
                pr_i  =  fil::print o int::to_string;
                pr_i2 =  fil::print o (\\ (i1, i2) => int::to_string i1; end );
                pr_c  =  fil::print o char::to_string;

                pr ("States: 0 -> ");
                pr_i (is::vals_count (states) - 1);
                pr "\nAccepting:";

                i2::apply  (\\ k = { pr " "; pr_i2 k;})
                           accepting;

                pr "\nMoves\n";

                mvs::apply

                    \\ (MOVE (i, NULL, d))
                           =>
                           {   pr " ";
                               pr_i i;
                               pr " --@--> ";
                               pr_i d;
                               pr "\n";
                           };

                       (MOVE (i, THE c, d))
                           =>
                           {   pr " ";
                               pr_i i;
                               pr " --";
                               pr_c c;
                               pr "--> ";
                               pr_i d;
                               pr "\n";
                           };
                    end

                    moves;
            };


        fun move' moves (i, c)
            = 
            case (mvs::find f moves)
                #
                THE (MOVE (s1, THE c', s2)) =>  THE s2;
                NULL                        =>  NULL;
                _                           =>  raise exception DIE "Compiler bug: Unsupported case in move'";
            esac
            where
                fun f (MOVE (s1, THE c', s2)) =>   (s1==i and c==c');
                    f _                       =>   raise exception DIE "Compiler bug: Unsupported case in move'";
                end;
            end;

    #   fun move (DFA { moves, ... } ) (i, c) = move' moves (i, c) 

        fun move (DFA { table, ... } ) (i, c)
            =
            a2::get (table, (i, char::to_int (c)-char::to_int (char::min_char)));

        fun accepting' accepting i
            =
            i2::fold_backward

                (\\ ((s, n), NULL)     => if (s==i)   THE n;
                                          else        NULL;
                                          fi;

                    ((s, n), THE (n')) => if (s==i)   THE n;
                                          else        THE n';
                                          fi;
                 end)

                 NULL

                 accepting;

    #   fun accepting (DFA { accepting, ... } ) i
    #            =
    #            accepting' accepting i 

        fun accepting (DFA { acc_table, ... } ) i
            =
            rwv::get (acc_table, i);

        fun can_start (DFA { start_table, ... } ) c
            =
            rwv::get (start_table, char::to_int (c));


        fun build' nfa
            = 
            {   move      = nfa::move nfa;
                accepting = nfa::accepting nfa;

                start = nfa::start nfa;
                chars = nfa::chars nfa;

                fun get_all_chars  ps
                    = 
                    is::fold_forward
                    (\\ (s, cs) =  cs::add_list (cs, chars s))
                    cs::empty ps;

                init_chars = get_all_chars  start;

                fun get_all_states (ps, c)
                    = 
                    is::fold_forward
                    (\\ (s, ss) =  is::union (ss, move (s, c)))
                    is::empty ps;

                fun loop ([], set, moves) => (set, moves);

                    loop (x ! xs, set, moves)
                        => 
                        {   cl = get_all_chars (x);

                            my (nstack, sdu, ml)
                                = 
                                cs::fold_forward
                                    (\\ (c, (ns, sd, ml))
                                        =
                                        {   u = get_all_states (x, c);

                                            if ((not (iss::member (set, u))
                                                 and (not (iss::member (sd, u))))
                                            )
                                                ( u ! ns,
                                                  iss::add (sd, u),
                                                  (x, c, u) ! ml
                                                );
                                            else
                                                (ns, sd, (x, c, u) ! ml);
                                            fi;
                                        }
                                    )
                                    ([], iss::empty,[])
                                    cl;

                            loop (nstack@xs, iss::union (set, sdu), ml@moves);
                        };
                end;

                my (s_set, m_list)
                    =
                    loop ([start], iss::singleton (start), []);

                num = REF 1;

                fun new ()
                    =
                    {   n = *num;
                        num := n+1 ;
                        n;
                    };

                s_map  =  map::set (map::empty, start, 0);

                s_set' =  iss::drop (s_set, start);

                s_map = iss::fold_forward
                            (\\ (is, map) =  map::set (map, is, new ()))
                            s_map
                            s_set';

                states = is::add_list (is::empty, list::from_fn (*num, \\ x = x));

                moves = mvs::add_list (mvs::empty,
                                       map (\\ (is1, c, is2)
                                               =
                                               MOVE (the (map::get (s_map, is1)),
                                                     THE c,
                                                     the (map::get (s_map, is2)))
                                           )
                                           m_list);

                # Given a set of accepting states, look for a given state,
                # with the minimal corresponding pattern number

                fun min_pattern acc_set
                    =
                    loop (tail l, head l)
                    where
                        l = map (the o accepting) (is::vals_list acc_set);

                        fun loop ([], min)
                                =>
                                min;

                            loop (n ! ns, min)
                                => 
                                if (n < min)   loop (ns, n);
                                else           loop (ns, min);
                                fi;
                        end;
                    end;


                accept =    iss::fold_forward
                                (\\ (is, cis)
                                    =
                                    {   items = is::filter
                                                    (\\ k
                                                        =
                                                        case (accepting k)
                                                            THE _ => TRUE;
                                                            NULL  => FALSE;
                                                        esac
                                                    )
                                                    is;

                                        if (is::is_empty items)
                                            #
                                            cis;
                                        else 
                                            i2::add (cis, (the (map::get (s_map, is)),
                                                                    min_pattern items));
                                        fi;
                                    }
                                )
                                i2::empty
                                s_set;

                table = a2::from_fn
                            ( (*num,   char::to_int (char::max_char)-char::to_int (char::min_char)+1),
                              \\ (s, c) =  move' moves (s, char::from_int (c+char::to_int (char::min_char)))
                            );

                acc_table = rwv::from_fn
                                ( *num, 
                                  \\ (s) =  accepting' accept s
                                );

                start_table =   rwv::from_fn
                                    ( char::to_int (char::max_char) - char::to_int (char::min_char)+1,
                                      \\ c =  cs::member (init_chars, char::from_int (c+char::to_int (char::min_char)))
                                    );


                DFA { states, moves, accepting=>accept, table, acc_table, start_table };
            };


        fun build r
            =
            build' (nfa::build (r, 0));

        fun build_pattern rs
            =
            build' (nfa::build_pattern rs);

    };
end;


## COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies.
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext