PreviousUpNext

15.4.33  src/app/future-lex/src/backends/sml/sml-fun-output.pkg

## sml-fun-output.pkg

# Compiled by:
#     src/app/future-lex/src/lexgen.lib



# Code generation for SML, using control-flow

###                          "Chaos theory is not nearly as exciting as it sounds. How could it be?"
###
###                                                           -- Stephen Kellert


stipulate
    package fil =  file__premicrothread;                        # file__premicrothread  is from   src/lib/std/src/posix/file--premicrothread.pkg
    package lms =  list_mergesort;                              # list_mergesort        is from   src/lib/src/list-mergesort.pkg
    package lo  =  lex_output_spec;                             # lex_output_spec       is from   src/app/future-lex/src/backends/lex-output-spec.pkg
    package re  =  regular_expression;                          # regular_expression    is from   src/app/future-lex/src/regular-expression.pkg
    package sis =  regular_expression::symbol_set;
    package sym =  re::sym;
herein

    package smlfun_output
    : (weak) Output                                             # Output                is from   src/app/future-lex/src/backends/output.api

    {
        Ml_Exp == ml::Ml_Exp;
        Ml_Pat == ml::Ml_Pat;

        inp = "inp";
        inp_variable = ML_VAR inp;

        fun id_of (lo::STATE { id, ... } )
            =
            id;

        fun name_of' i =  "yyQ" + (int::to_string i);
        fun name_of  s =  name_of' (id_of s);
        fun act_name i =  "yyAction" + (int::to_string i);

        # Simple heuristic to avoid computing unused values:
        #
        stipulate 
            has = string::is_substring;
        herein
            hasyytext   = has "yytext";
            has_reject   = has "REJECT";
            hasyylineno = has "yylineno";
        end;

        fun map_int f syms                                      # Map over the intervals of a symbol set 
            =
            sis::foldl_int
                (\\ (i, ls) =  (f i) ! ls)
                []
                syms;

        Transition_Interval                                     # Transition interval representation:
            =
            TI  (sis::Interval, Int, Ml_Exp);

        fun interval_of (TI (i, t, e)) = i;
        fun tag_of      (TI (i, t, e)) = t;
        fun action_of   (TI (i, t, e)) = e;
        fun same_tag    (TI (_, t1, _), TI (_, t2, _))   =   t1 == t2;
        fun singleton  (TI ((i, j), _, _))              =   i  == j;

        # Generate code for transitions: generate a hard-coded binary
        # search on accepting characters

        fun mk_trans ([ ], _) =>  raise exception DIE "(BUG) SMLFunOutput: alphabet not covered";
            mk_trans ([t], _) =>  action_of t;

            mk_trans ([t1, t2], _)
                => 
                if (same_tag (t1, t2) )
                    #
                    action_of t1;
                else
                    my (_, t1end)   =  interval_of t1;
                    my (t2start, _) =  interval_of t2;

                    if (singleton t1)
                        #
                        ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM t1end),
                             action_of t1,
                             action_of t2);

                    elif (singleton t2)
                        #
                        ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM t2start),
                             action_of t2,
                             action_of t1);
                    else
                        #
                        ML_IF (ML_CMP (ml::LEQ, inp_variable, ML_SYM t1end),
                             action_of t1,
                             action_of t2);
                    fi;
                fi;

           mk_trans (ts, len)
               =>
                {   lh = len / 2;
                    #
                    fun split (    ls,  0,  l1) =>  (list::reverse l1, ls);
                        split (l ! ls, count, l1) =>  split (ls, count - 1, l ! l1);
                        split _                 =>  raise exception DIE "(BUG) SMLFunOutput: split failed";
                    end;

                    my (ts1, ts2)
                        =
                        split (ts, lh, []);

                    my (ts2start, ts2end)
                        =
                        interval_of (list::head ts2);

                    my (ts2', ts2len)
                        =
                        ts2start == ts2end
                         ??   (list::tail ts2, len - lh - 1)
                         ::   (           ts2, len - lh    );

                    # we want to take advantage of the special case when 
                    # len = 3 and hd ts2 is a singleton.  this case often
                    # occurs when we have an arrow for a single character.
                    #   
                    else_clause
                        = 
                        if (lh == 1 and ts2len == 1)
                            #
                            mk_trans ([list::head ts1, list::head ts2'], 2);
                        else
                            ML_IF (ML_CMP (ml::LT, inp_variable, ML_SYM ts2start),
                                   mk_trans (ts1, lh),
                                   mk_trans (ts2', ts2len));
                        fi;

                    ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM ts2start),
                           action_of (list::head ts2),
                           else_clause);
              };
        end;

        fun mk_state action_vec (s, k)
            =
            {   s ->   lo::STATE { id, start_state, label, final, next };
                #
                fun add_match (i, last_match)
                    =
                    {   last_match' =   has_reject (vector::get (action_vec, i))
                                         ?? last_match
                                         :: ML_VAR "yyNO_MATCH";

                        ML_APP ("yyMATCH",
                                [ML_VAR "stream",
                                 ML_VAR (act_name i),
                                 last_match']);
                    };

                my (cur_match, next_matches)
                    =
                    case final
                         []     => (NULL, []);
                         f ! fs => (THE f, fs);
                    esac;

                last_match
                    =
                    list::fold_backward add_match (ML_VAR "lastMatch") next_matches;

                #  Collect all valid transition symbols 

                labels
                    =
                    list::fold_forward sis::union sis::empty (list::map #1 *next);

                #  pair transition intervals with associated actions/transitions 

                new_final
                    =
                    case cur_match
                        #
                        THE j =>  add_match (j, last_match);
                        NULL  =>  last_match;
                    esac;

                fun arrows (syms, s)
                    = 
                    map_int 
                      (\\ i => TI (i, id_of s, 
                         ML_APP (name_of s, [ML_VAR "stream'", new_final])); end )
                      syms;

                tis = list::map arrows *next;

                err_act'
                    = 
                    case cur_match
                        #
                        THE j =>    ML_APP ( act_name j, 

                                             [ ML_VAR "stream", 

                                               has_reject (vector::get (action_vec, j))
                                                ?? last_match
                                                :: ML_VAR "yyNO_MATCH"
                                             ]
                                           );

                        NULL =>  ML_APP ("yystuck", [last_match]);
                    esac;

                # If start state, check for eof:
                # 
                err_act =   if start_state
                                #
                                ML_IF (ML_APP("yyInput::eof", [ML_VAR "stream"]),
                                                ML_APP("user_declarations::eof", [ML_VAR "yyarg"]),
                                                err_act');
                            else
                                err_act';
                            fi;

                # Error transitions = complement (valid transitions) 
                #
                error   =  sis::complement labels;
                err_tis =  map_int (\\ i =  TI (i, -1, err_act)) error;

                # The arrows represent intervals that partition the entire
                # alphabet, with each interval mapped to some transition or
                # action.  We sort the intervals by their smallest member:
                #
                fun gt (a, b)
                    =
                    (#1 (interval_of a)) > (#1 (interval_of b));

                sorted =   lms::sort_list  gt  (list::cat (err_tis ! tis));

                # Now we want to find adjacent partitions with the same 
                # action, and merge their intervals:
                #
                fun merge [ ] => [ ];
                    merge [t] => [t];

                    merge (t1 ! t2 ! ts)
                        => 
                        if (same_tag (t1, t2) )
                            #
                            t1 ->  TI ((i, _), tag, act);
                            t2 ->  TI ((_, j), _,   _  );

                            t = TI ((i, j), tag, act);

                            merge (t ! ts);

                        else
                            t1 ! (merge (t2 ! ts));
                        fi;
                end;

                merged = merge sorted;

                # Create the transition code 
                #
                trans = mk_trans (merged, list::length merged);

                # Create the input code 
                #
                get_inp
                    = 
                    # trans has at least the error action.  if length (merged)
                    # is 1 then we can avoid getting any input and simply
                    # take the error transition in all cases.  note that
                    # the "error" transition may actually be a match
                    #
                    case merged
                        #
                        [_] => err_act;

                        _   => ML_CASE (ML_APP ("yygetc", [ML_VAR "stream"]),
                                 [(ML_CON_PATTERN ("NULL", []), err_act),
                                  (ML_CON_PATTERN ("THE", [ML_VAR_PATTERN (inp + ", stream'")]), 
                                     trans)]);
                    esac;

                  ML_FUN (name_of s, ["stream", "lastMatch"], get_inp, k);
              };

        fun mk_action (i, action, k)
            =
            {   upd_strm =  ML_REF_PUT (ML_VAR "yystrm", ML_VAR "stream");
                act      =  ML_RAW [ml::TOK action];
                seq      =  ML_SEQ [upd_strm, act];

                lett =  if (hasyytext action )
                        
                            ML_LET ( "yytext", 
                                    ML_APP("yymktext", [ML_VAR "stream"]), 
                                    seq
                                  );
                        else
                           seq;
                        fi;

                letl =  if (hasyylineno action)
                            #
                            ML_LET ( "yylineno", 
                                    ML_APP ( "REF", 
                                             [ML_APP ("yyInput::getlineNo", 
                                                [ML_REF_GET (ML_VAR "yystrm")])]), 
                                   lett
                                  );
                        else
                           lett;
                        fi;

                letr =  if (has_reject action)
                            #
                            ML_LET ("oldStrm", ML_REF_GET (ML_VAR "yystrm"),
                                   ML_FUN
                                     ("REJECT", [],
                                      ML_SEQ 
                                        [ML_REF_PUT (ML_VAR "yystrm", 
                                                    ML_VAR "oldStrm"),
                                         ML_APP("yystuck", [ML_VAR "lastMatch"])],
                                      letl));
                        else
                            letl;
                        fi;

                ML_NEW_GROUP (ML_FUN (act_name i, ["stream", "lastMatch"], letr, k));
            };

        package scc
            =
            digraph_strongly_connected_components_g (
                package {
                   Key = lo::Dfa_State;

                   fun compare (lo::STATE { id => id1, ... }, lo::STATE { id => id2, ... } )
                       =
                       int::compare (id1, id2);
                }
            );

        fun mk_states (actions, dfa, start_states, k)
            =
            {   fun follow (lo::STATE { next, ... } )
                    = 
                    #2 (paired_lists::unzip *next);

                scc = scc::topological_order' { roots => start_states, follow };

                mk_state' = mk_state actions;

                fun mk_grp (scc::SIMPLE    state,  k)   =>   ML_NEW_GROUP (mk_state' (state, k));
                    mk_grp (scc::RECURSIVE states, k)   =>   ML_NEW_GROUP (list::fold_backward mk_state' k states);
                end;

                list::fold_forward mk_grp k scc;
            };

        fun lexer_hook spec stream
            =
            {   spec ->  lo::SPEC { actions, dfa, start_states, ... };
                #
                fun match_ss (label, state)
                    =
                    (  ML_CON_PATTERN (label, []), 
                         ML_APP (name_of state, 
                                      [ML_REF_GET (ML_VAR "yystrm"), 
                                       ML_VAR "yyNO_MATCH"])
                    );

                inner_expression = ML_CASE (ML_REF_GET (ML_VAR "yyss"),
                                        list::map match_ss start_states);

                states_expression = mk_states 
                                  (actions, dfa, 
                                   #2 (paired_lists::unzip start_states), inner_expression);

                lexer_expression =   vector::keyed_fold_backward mk_action states_expression actions;

                prettyprint_stream =   plain_file_prettyprinter::make_plain_file_prettyprinter { output_stream => stream };

                ml::prettyprint_ml (prettyprint_stream, lexer_expression);
            };

        fun start_states_hook spec stream
            =
            {   spec ->  lo::SPEC { start_states, ... };
                #
                mach_names = #1 (paired_lists::unzip start_states);

                fil::write (stream, string::join " | " mach_names);
            };

        fun user_decls_hook spec stream
            =
            {   spec ->  lo::SPEC { decls, ... };
                #
                fil::write (stream, decls);
            };

        fun header_hook spec stream
            =
            {   spec ->  lo::SPEC { header, ... };
                #
                fil::write (stream, header);
            };

        fun args_hook spec stream
            =
            {   spec ->  lo::SPEC { arg, ... };
                #
                arg' =  if (string::length_in_bytes arg == 0)
                            #
                            "(yyarg as ())";
                        else
                            "(yyarg as " + arg + ") ()";
                        fi;

                fil::write (stream, arg');
            };

        package tio= file__premicrothread;      # file__premicrothread  is from   src/lib/std/src/posix/file--premicrothread.pkg

        template
            =
            {
                file = tio::open_for_read "backends/sml/template-sml-fun.pkg";
                #
                fun done () = tio::close_input file;

                fun read ()
                    =
                    case (tio::read_line file)
                        #
                        NULL     =>  [];
                        THE line =>  line ! read();
                    esac;


                (read()
                 except
                    ex = { done(); raise exception ex;}
                )
                then
                    done();
            };

        fun output (spec, fname)
            = 
            expand_file::expand {
                  src   => template,
                  dst   => fname + ".pkg",
                  hooks => [  ("lexer",       lexer_hook       spec),
                             ("startstates", start_states_hook spec),
                             ("userdecls",   user_decls_hook   spec),
                             ("header",      header_hook      spec),
                             ("args",        args_hook        spec)
                          ]
            };

    };
end;


## John Reppy (http://www.cs.uchicago.edu/~jhr)
## Aaron Turon (adrassi@gmail.com)
## All rights reserved.
## COPYRIGHT (c) 2005 
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext