PreviousUpNext

15.4.38  src/app/future-lex/src/lex-fn.pkg

## lex-fn.pkg
## John Reppy (http://www.cs.uchicago.edu/~jhr)
## Aaron Turon (adrassi@gmail.com)
## All rights reserved.

# Compiled by:
#     src/app/future-lex/src/lexgen.lib

# DFA generation using RE derivatives

package lex_fn
: (weak)
api {
    gen:  lex_spec::Spec
          ->
          lex_output_spec::Spec;

}
{
    package re  = regular_expression;                                   # regular_expression            is from   src/app/future-lex/src/regular-expression.pkg
    package sis = regular_expression::symbol_set;
    package lo  = lex_output_spec;                                      # lex_output_spec               is from   src/app/future-lex/src/backends/lex-output-spec.pkg

    package map
        =
        red_black_map_g (                                               # red_black_map_g               is from   src/lib/src/red-black-map-g.pkg
            package {
                Key = vector::Vector( re::Re );
                compare = vector::compare_sequences re::compare;
            }
        );

    # Given a list of RE vectors (start states), produce a DFA recognizer: 
    # NOTE: invoked once per start state (each start state has a DFA)
    #
    fun make_dfa start_vecs
        =
        {   n = REF 0;          #  next state id 
            states = REF [];

            # Return the state that the re vector maps to and 
            # a flag set to TRUE if the state is new.

            fun make_state (state_map, result, as_ss)
                =
                case (map::get (state_map, result))
                  
                     NULL => {
                        id = *n;
                        fun add_final (idx, re, finals) = 
                              if (re::nullable re)
                                   idx ! finals;
                              else finals;fi;
                        q = lo::STATE {
                                id, start_state => as_ss, label => result,
                                final => vector::keyed_fold_backward add_final [] result,
                                next => REF []
                              };

                          n := id+1;
                          states := q ! *states;
                          (TRUE, q, map::set (state_map, result, q));
                        };

                     THE q => (FALSE, q, state_map);
                esac;


            fun init_iter (states, state_map, [])
                    =>
                    (list::reverse states, state_map);

                init_iter (states, state_map, vec ! vecs)
                    =>
                    {   my (_, q, state_map') = make_state (state_map, vec, TRUE);
                        init_iter (q ! states, state_map', vecs);
                    };
            end;

            my (init_states, init_statemap)
                =
                init_iter ([], map::empty, start_vecs);

            fun f (state_map, [])
                    =>
                    state_map;

                f (state_map, lo::STATE { next, label, ... } ! work_list)
                    =>
                    {   fun move ((result, edge), (state_map, work_list))
                            = 
                            if   (vector::all re::is_none result)               #  if error transition 
                                 (state_map, work_list);
                            else
                                  my (is_new, q, state_map)
                                      =
                                      make_state (state_map, result, FALSE);

                                  next := (edge, q) ! *next;

                                  if is_new  (state_map, q ! work_list);
                                  else       (state_map,     work_list);  fi;
                            fi;

                        edges = re::derivatives label;

                        f (list::fold_forward move (state_map, work_list) edges);
                    };
            end;

            ignore (f (init_statemap, init_states));
            (init_states, list::reverse *states);
        };

    # Clamp a machine to the right character set:
    #
    fun clamp clamp_to states
        =
        {   ascii127
                =
                sis::interval (0u0, 0u127);

            fun clamp_trans (edge, q)
                = 
                (sis::intersect (ascii127, edge), q);

            fun clamp_state (lo::STATE { next, ... } )
                =
                next :=  list::map clamp_trans *next;

            list::apply clamp_state states;
            states;
        };

    fun gen spec
        =
        {
#  TODO: check for invalid start states on rules    XXX BUGGO
          my lex_spec::SPEC { decls, conf, rules } = spec;

          my lex_spec::CONF { struct_name, header,
                            arg, start_states, ... } = conf;
          start_states' = quickstring_set::add (start_states, quickstring__premicrothread::from_string "INITIAL");
#
#         # Split out actions and associate each ruleSpec to an action ID
#         #
#         # Note: matchActions tries to find textually idential actions and map
#         # them to the same entry in the action vector
#         #
#         fun matchActions rules = let
#               fun iter ((ruleSpec, action) ! rules, 
#                         ruleSpecs, actions, actionMap, n) = let
#                     key = quicstring::from_string action
#                     my (i, actions', actionMap', n') = 
#                           case quickstring_map::get (actionMap, key)
#                            of NULL => (n, action ! actions,
#                                        quickstring_map::set (actionMap, key, n),
#                                        n+1)
#                             | THE i => (i, actions, actionMap, n)
#                     in
#                       iter (rules, (i, ruleSpec) ! ruleSpecs,
#                             actions', actionMap', n')
#                     end
#                 | iter ([], ruleSpecs, actions, _, _) = 
#                     (list::reverse ruleSpecs, list::reverse actions)
#               in
#                 iter (rules, [], [], quickstring_map::empty, 0)
#               end
#         my (ruleSpecs, actions) = matchActions rules

          my (rule_specs, actions) = paired_lists::unzip rules;
          actions_vec = vector::from_list actions;
          start_states = quickstring_set::vals_list start_states';

          fun ssvec label
              =
              {   fun has_rule (NULL, re)
                          =>
                          re;

                      has_rule (THE ss, re)
                          =>
                          if (quickstring_set::member (ss, label))
                               re;
                          else regular_expression::none;
                          fi;
                   end;

                   rules =  list::map has_rule rule_specs;

                   vector::from_list rules;
              };

          my (init_states, states)
              =
              make_dfa (list::map ssvec start_states);

          lo::SPEC {
               decls,
               header => if (string::length_in_bytes header == 0)
                              "package " + 
                                if   (string::length_in_bytes struct_name == 0)
                                     "Mlex";
                                else
                                     struct_name;
                                fi;
                         else
                              header;
                         fi,
               arg,
               actions      =>  actions_vec,
               dfa          =>  clamp lex_spec::CLAMP127 states,
               start_states =>  paired_lists::zip 
                                (list::map quickstring__premicrothread::to_string start_states, 
                                 init_states)
             };
          };
};


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext