PreviousUpNext

15.4.855  src/lib/regex/backend/dfa-engine.pkg

## dfa-engine.pkg

# Compiled by:
#     src/lib/std/standard.lib

# Implements a matcher engine based on deterministic finite
# automata.



###                  "The opposite of a trivial truth is false;
###                   the opposite of a great truth is also true."
###
###                                        -- Niels Bohr



package dfa_engine: (weak)  Regular_Expression_Engine {                 # Regular_Expression_Engine     is from   src/lib/regex/backend/regular-expression-engine.api

    package d =  dfa;                           # dfa                           is from   src/lib/regex/backend/dfa.pkg
    package m =  regex_match_result;            # regex_match_result            is from   src/lib/regex/glue/regex-match-result.pkg
    package r = abstract_regular_expression;    # abstract_regular_expression   is from   src/lib/regex/front/abstract-regular-expression.pkg

    Compiled_Regular_Expression = d::Dfa;

    fun compile r
        =
        d::build r 
        except
            _ = raise exception abstract_regular_expression::CANNOT_COMPILE;

    # Look at a stream and match against dfa.
    # On success return  THE (pattern#, Match, rest of stream).
    # On failure return  NULL.
    #
    fun scan (regexp, getc, p, stream)
        = 
        {   move      =  d::move       regexp;
            accepting =  d::accepting  regexp;
            can_start =  d::can_start  regexp;  

            fun loop (state, p, inits, last_accepting)
                =
                case (getc (inits)) 

                     NULL => last_accepting;

                     THE (c, s')
                         =>
                         case (move (state, c))

                             NULL => last_accepting;

                             THE new
                                 =>
                                 case (accepting new)
                                     THE n => loop (new, p+1, s', THE (p+1, s', n));
                                     NULL  => loop (new, p+1, s', last_accepting);
                                 esac;
                         esac;
                esac;



            fun try0 stream
                =
                case (accepting 0)

                    THE n => THE ( n,
                                   m::REGEX_MATCH_RESULT (THE { match_position => stream, match_length => 0 },[]),
                                   stream
                                 );

                    NULL  => NULL;
                esac;


            case (getc (stream))

                NULL => try0 stream;

                THE (c, s')
                    => 
                    case (loop (0, p, stream, NULL))

                        NULL => try0 stream;

                        THE (last, cs, n)
                            =>
                            THE ( n,
                                  m::REGEX_MATCH_RESULT (THE { match_position => stream, match_length => last-p },[]),
                                  cs
                                );
                    esac;
            esac;
        };

    fun prefix regexp getc stream
        =
        case (scan (regexp, getc, 0, stream))
            THE (n, m, cs) => THE (m, cs);
            NULL => NULL;
        esac;

    fun find regexp getc stream
        = 
        loop (0, stream)
        where
            fun loop (p, s)
                =
                case (scan (regexp, getc, p, s))
                    NULL =>
                        case (getc (s))
                            THE (_, s') =>  loop (p+1, s');
                            NULL        =>  NULL;
                        esac;

                    THE (n, m, cs)
                        =>
                        THE (m, cs);
                esac;
        end;

    fun match []
            =>
            (\\ getc =   \\ stream =   NULL);

        match l
            => 
            {   dfa = d::build_pattern (map #1 l);

                a = rw_vector::from_list (map (\\ (a, b) = b) l);

                \\ getc   =
                \\ stream =  case (scan (dfa, getc, 0, stream))

                                 THE (n, m, cs) => THE ((rw_vector::get (a, n)) m, cs);

                                 NULL => NULL;
                             esac;
         };
   end;

};


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext