PreviousUpNext

15.4.43  src/app/lex/lexgen.pkg

##  Lexical analyzer generator for Standard ML.
##      Version 1.7.0, June 1998

# Compiled by:
#     src/app/lex/mythryl-lex.lib

#  This software comes with ABSOLUTELY NO WARRANTY.
#  This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
#  COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT",
#  distributed with this software). You may copy and distribute this software;
#  see the COPYRIGHT NOTICE for details and restrictions.
#
#       Changes:
#           07/25/89 (drt): added %header declaration, code to place
#                   user declarations at same level as make_lexer, etc.
#                   This is needed for the parser generator.
#             /10/89 (appel): added %arg declaration (see lexgen.doc).
#             /04/90 (drt): fixed following bug: couldn't use the lexer after an
#                   error occurred -- NextTok and inquote weren't being reset
#           10/22/91 (drt): disabled use of lookahead
#           10/23/92 (drt): disabled use of $ operator (which involves lookahead),
#                   added handlers for dictionary lookup routine
#           11/02/92 (drt): changed handler for exception Reject in generated lexer
#                   to internal::Reject
#           02/01/94 (appel): Moved the exception handler for Reject in such
#                   a way as to allow tail-recursion (improves performance
#                   wonderfully!).
#           02/01/94 (appel): Fixed a bug in parsing of state names.
#           05/19/94 (Mikael Pettersson, mpe@ida.liu.se):
#                   Transition tables are usually represented as strings, but
#                   when the range is too large, int vectors constructed by
#                   code like "vector::Vector[1, 2, 3, ...]" are used instead.
#                   The problem with this isn't that the vector itself takes
#                   a lot of space, but that the code generated by Lib7 to
#                   construct the intermediate list at run-time is *HUGE*. My
#                   fix is to encode an int vector as a string literal (using
#                   two bytes per int) and emit code to decode the string to
#                   a vector at run-time. Lib7 compiles string literals into
#                   substrings in the code, so this uses much less space.
#           06/02/94 (jhr): Modified export-lex.pkg to conform to new installation
#                   scheme.  Also removed tab characters from string literals.
#           10/05/94 (jhr): Changed generator to produce code that uses the new
#                   basis style strings and characters.
#           10/06/94 (jhr) Modified code to compile under new basis style strings
#                   and characters.
#           02/08/95 (jhr) Modified to use new List module interface.
#           05/18/95 (jhr) changed vector::Vector to vector::from_list

#  Revision 1.9  1998/01/06 19:23:53  appel
#    added %posarg feature to permit position-within-file to be passed
#    as a parameter to make_lexer

# Revision 1.8  1998/01/06  19:01:48  appel
#   repaired error messages like "cannot have both %package and %header"
#
# Revision 1.7  1998/01/06  18:55:49  appel
#   permit %% to be unescaped within regular expressions
#
# Revision 1.6  1998/01/06  18:46:13  appel
#   removed undocumented feature that permitted extra %% at end of rules
#
# Revision 1.5  1998/01/06  18:29:23  appel
#   put yylineno variable inside make_lexer function
#
# Revision 1.4  1998/01/06  18:19:59  appel
#   Check for newline inside quoted string
#
# Revision 1.3  1997/10/04  03:52:13  dbm
#   Fix to remove output file if mythryl-lex fails.
#
#        10/17/02 (jhr) changed bad character error message to properly
#               print the bad character.
#        10/17/02 (jhr) fixed skipws to use char::is_space test.
#       07/27/05 (jhr) add \r as a recognized escape sequence.


#    Subject: lookahead in mythryl-lex
#    Reply-to: david.tarditi@CS.CMU.EDU
#    Date: Mon, 21 Oct 91 14:13:26 -0400

# There is a serious bug in the implementation of lookahead,
# as done in mythryl-lex, and described in Aho, Sethi, and Ullman,
# p. 134 "Implementing the Lookahead Operator"

# We have disallowed the use of lookahead for now because
# of this bug.

# As a counter-example to the implementation described in
# ASU, consider the following specification with the
# input string "aba" (this example is taken from
# a comp.compilers message from Dec. 1989, I think):

# Lex_Result=Void
# linenum = REF 1
# fun error x = file::write (fil::stderr, x + "\n")
# eof = \\ () => ()
# %%
# %package lex
# %%
# (a|ab)/ba => (print yytext; print "\n"; ());

# The ASU proposal works as follows. Suppose that we are
# using NFA's to represent our regular expressions.  Then to
# build an NFA for e1 / e2, we build an NFA n1 for e1 
# and an NFA n2 for e2, and add an epsilon transition
# from e1 to e2.

# When lexing, when we encounter the end state of e1e2,
# we take as the end of the string the position in
# the string that was the last occurrence of the state of
# the NFA having a transition on the epsilon introduced
# for /.

# Using the example we have above, we'll have an NFA
# with the following states:


#    1 -- a --> 2 -- b --> 3
#               |          |
#               | epsilon  | epsilon
#               |          |
#               |------------> 4 -- b --> 5 -- a --> 6

# On our example, we get the following list of transitions:

# a:      2, 4      (make an epsilon transition from 2 to 4)
# ab:     3, 4, 5   (make an epsilon transition from 3 to 4)
# aba:    6

# If we chose the last state in which we made an epsilon transition,
# we'll chose the transition from 3 to 4, and end up with "ab"
# as our token, when we should have "a" as our token.



###              "Men have become the tools of their tools."
###
###                            -- Henry David Thoreau



# Is there any reason to use this instead of standard library red-black trees?
# (Probably dates from era before standard library had them?)  XXX SUCKO FIXME

stipulate
    package fil =  file__premicrothread;                                # file__premicrothread  is from   src/lib/std/src/posix/file--premicrothread.pkg
herein

    generic package red_black_g (  b:  api {    Key;
                                       > : (Key, Key) -> Bool;
                                  }
                           )
    : (weak)
    api {  Tree;
         Key;
         empty:  Tree;
         insert:  (Key, Tree) -> Tree;
         lookup:  (Key, Tree) -> Key;
        exception NOT_FOUND  Key;
    }

    {
        include package   b;

        Color = RED | BLACK;

        Tree = EMPTY | TREE  (Key, Color, Tree, Tree);     empty = EMPTY;

        exception NOT_FOUND  Key;

        fun insert (key, t)
            =
            {   fun f EMPTY
                        =>
                        TREE (key, RED, EMPTY, EMPTY);

                    f (TREE (k, BLACK, l, r))
                        =>
                        if (key > k)

                            case (f r)

                                r as TREE (rk, RED, rl as TREE (rlk, RED, rll, rlr), rr)
                                    =>
                                    case l
                                        TREE (lk, RED, ll, lr)
                                            =>
                                            TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                          TREE (rk, BLACK, rl, rr));

                                       _ => TREE (rlk, BLACK, TREE (k, RED, l, rll),
                                                              TREE (rk, RED, rlr, rr));
                                    esac;

                                r as TREE (rk, RED, rl, rr as TREE (rrk, RED, rrl, rrr))
                                    =>
                                    case l
                                        TREE (lk, RED, ll, lr)
                                            =>
                                            TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                          TREE (rk, BLACK, rl, rr));

                                        _   => TREE (rk, BLACK, TREE (k, RED, l, rl), rr);
                                    esac;

                                r => TREE (k, BLACK, l, r);
                            esac;

                        elif (k > key)

                            case (f l)
                                #
                                l as TREE (lk, RED, ll, lr as TREE (lrk, RED, lrl, lrr))
                                    =>
                                    case r
                                        TREE (rk, RED, rl, rr)
                                            =>
                                            TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                          TREE (rk, BLACK, rl, rr));

                                        _   =>
                                            TREE (lrk, BLACK, TREE (lk, RED, ll, lrl),
                                                              TREE (k, RED, lrr, r));
                                    esac;

                                l as TREE (lk, RED, ll as TREE (llk, RED, lll, llr), lr)
                                    =>
                                    case r
                                        TREE (rk, RED, rl, rr)
                                            =>
                                            TREE (k, RED, TREE (lk, BLACK, ll, lr),
                                                       TREE (rk, BLACK, rl, rr));
                                       _    =>
                                            TREE (lk, BLACK, ll, TREE (k, RED, lr, r));
                                    esac;

                                l => TREE (k, BLACK, l, r);
                            esac;
                        else
                            TREE (key, BLACK, l, r);
                        fi;

                    f (TREE (k, RED, l, r))
                        =>
                        if   (key > k) TREE (k, RED, l, f r);
                        elif (k > key) TREE (k, RED, f l, r);
                        else           TREE (key, RED, l, r);
                        fi;
                end;

                case (f t)
                    TREE (k, RED, l as TREE(_, RED, _, _), r) => TREE (k, BLACK, l, r);
                    TREE (k, RED, l, r as TREE(_, RED, _, _)) => TREE (k, BLACK, l, r);
                    t => t;
                esac;
            };


        fun lookup (key, t)
            =
            get t
            where
                fun get EMPTY
                        =>
                        raise exception (NOT_FOUND key);

                    get (TREE (k, _, l, r))
                        =>
                        if   (k>key) get l;
                        elif (key>k) get r;
                        else         k;
                        fi;
                end;
            end;

    };

    api Lexgen {

        lex_fn: String -> Void;
    };

    package lex_fn: (weak) Lexgen  {

        include package   rw_vector;
        include package   list;

        infix my 9  sub ;

        Token = CHARS   Rw_Vector (Bool) | QMARK | STAR | PLUS | BAR
              | LP | RP | CARAT | DOLLAR | SLASH | STATE  List( String )
              | REPS  (Int, Int) | ID  String | ACTION  String
              | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES 
              | COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
              ;

        Expression
            = EPS | ILK  (Rw_Vector( Bool ), Int) | CLOSURE  Expression
            | ALT  (Expression, Expression) | CAT  (Expression, Expression) | TRAIL  Int
            | END  Int
            ;

        # Flags describing input Lex spec.
        # - unnecessary code is omitted 
        # if possible 

        char_format           =  REF FALSE;     
        uses_trailing_context =  REF FALSE;
        uses_previous_newline =  REF FALSE;

        # Flags for various bells & whistles that Lex has.
        # These slow the lexer down and should be omitted
        # from production lexers (if you really want speed)

        count_newlines = REF FALSE;
        pos_arg        = REF FALSE;
        have_reject    = REF FALSE;

        #  Can increase size of character set 

        char_set_size = REF 129;

        #  Can name package or declare header code 

        package_name = REF "Mlex";
        header_code  = REF "";
        header_decl  = REF FALSE;
        arg_code     = REF (NULL: Null_Or( String ));

        package_declaration
            =
            REF FALSE;

        reset_flags
            =
            \\ ()
                =
                {   count_newlines := FALSE;
                    have_reject    := FALSE;
                    pos_arg        := FALSE;

                    uses_trailing_context := FALSE;

                    char_set_size  := 129;
                    package_name   := "Mlex";
                    header_code    := "";
                    header_decl    := FALSE;
                    arg_code       := NULL; 
                    package_declaration := FALSE;
                };

        lex_out = REF fil::stdout;

        fun say x
            =
            fil::write(*lex_out, x);

        # Union: merge two sorted lists of integers 
        #
        fun union (a, b)
            =
            merge ( reverse a,
                    reverse b,
                    NIL
                  )
            where
                recursive my merge
                    =
                    \\ (NIL, NIL, z) => z;
                       (NIL, el ! more, z) => merge (NIL, more, el ! z);
                       (el ! more, NIL, z) => merge (more, NIL, el ! z);

                       (x ! morex, y ! morey, z)
                           =>
                           if   ((x: Int)==(y: Int))   merge (morex, morey, x ! z);
                           elif (x > y)                merge (morex, y ! morey, x ! z);
                           else                        merge (x ! morex, morey, y ! z);
                           fi;
                    end;
            end;

        # Nullable: compute if a important expression
        # parse tree node is nullable 
        #
        recursive my nullable
            =
            \\
                EPS          =>  TRUE;
                ILK(_)       =>  FALSE;
                CLOSURE(_)   =>  TRUE;
                ALT (n1, n2) =>  nullable n1 or nullable n2;
                CAT (n1, n2) =>  nullable n1 and nullable n2;
                TRAIL (_)    =>  TRUE;
                END (_)      =>  FALSE;
            end 


        # FIRSTPOS: firstpos function for parse tree expressions 
        #
        also
        firstpos
            =
            \\
                EPS          =>  NIL;
                ILK(_, i)    =>  [i];
                CLOSURE (n)  =>  firstpos n;
                ALT (n1, n2) =>  union (firstpos n1, firstpos n2);
                CAT (n1, n2) =>  if (nullable n1 ) union (firstpos n1, firstpos n2);
                                                  else firstpos n1; fi;
                TRAIL i    =>  [i];
                END i      =>  [i];
            end 


        # LASTPOS: Lastpos function for parse tree expressions 
        #
        also
        lastpos
            =
            \\  EPS          => NIL;
                ILK(_, i)    => [i];
                CLOSURE n  => lastpos n;
                ALT (n1, n2) => union (lastpos n1, lastpos n2);
                CAT (n1, n2) => if  (nullable n2  )  union (lastpos n1, lastpos n2);
                                                   else  lastpos n2;                   fi;
                TRAIL i    => [i];
                END i      => [i];
            end 
                ;

        #  +++: Increment an integer reference 

        fun +++(x)  : Int
            =
            {   x := *x + 1;
                *x;
            };

        package dictionary {

                Relation(X)
                    =
                    (X, X) -> Bool;

#               abstype Dictionary (Y, X)
#                   =
#                   DATA  { table:  List( (Y, X) ),
#                           leq:  (Y, Y) -> Bool
#                         }
#               with

                stipulate
                    Dictionary (Y, X)                           # Start of abstype-replacement recipe -- see http://successor-ml.org/index.php?title=Degrade_abstype_to_derived_form 
                        =                                       # 
                        DATA  { table:  List( (Y, X) ),         # 
                                leq:  (Y, Y) -> Bool            # 
                              };                                # 
                herein                                          # 
                    Dictionary (Y, X) = Dictionary (Y, X);      # End of abstype-replacement recipe

                    exception LOOKUP;

                    fun create leqfunc
                        =
                        DATA { table => NIL, leq => leqfunc };

                    fun lookup (DATA { table => entrylist, leq } ) key
                        =
                        search entrylist
                        where
                            fun search []
                                    =>
                                    raise exception LOOKUP;

                                search((k, item) ! entries)
                                    =>
                                    if  (leq (key, k))

                                        if (leq (k, key))   item;
                                        else                raise exception LOOKUP;
                                        fi;
                                    else
                                        search entries;
                                    fi;
                            end;
                        end;

                     fun enter (DATA { table => entrylist, leq } )
                               (newentry as (key: Y, item: X)) :     Dictionary (Y, X)
                         =
                         {   gt =   \\ a =  \\ b  =  not (leq (a, b));
                             eq =   \\ k =  \\ k' =  (leq (k, k')) and (leq (k', k));

                             fun update NIL
                                     =>
                                     [ newentry ];

                                 update ((entry as (k, _)) ! entries)
                                     =>
                                     if   (eq  key k  )  newentry ! entries;
                                     elif (gt  k key  )  newentry ! (entry ! entries);
                                     else                entry    ! (update entries);
                                     fi;
                             end;

                             DATA { table => update entrylist, leq };
                         };

                     fun listofdict (DATA { table => entrylist, leq } )
                         =
                         f (entrylist, NIL)
                         where
                             fun f   (NIL, r) =>  reverse r;
                                 f (a ! b, r) =>  f (b, a ! r);
                             end;
                         end;
              end;
        };

        include package   dictionary; 

        #  INPUT.ML:  Input w/ one character push back capability 

        line_num =  REF 1;

        stipulate
            Ibuf =  BUF (                               # Start of abstype-replacement recipe -- see http://successor-ml.org/index.php?title=Degrade_abstype_to_derived_form 
                      fil::Input_Stream,                #
                                                        #
                      { b:  Ref( String ),              #
                        p:  Ref( Int )                  #
                      }                                 #
                    );                                  #
        herein                                          #
            Ibuf = Ibuf;                                # End of abstype-replacement recipe.

            fun make_ibuf s
                =
                BUF (s, { b=>REF "", p => REF 0 } );

            fun close_ibuf (BUF (s, _))
                =
                fil::close_input s;

            exception EOF_EXCEPTION;

            fun getch (a as (BUF (s,{ b, p } )))
                = 
                if (*p == size *b)
                    #
                    b :=  fil::read_n (s, 1024);
                    p :=  0;

                    if  (size *b == 0)   raise exception EOF_EXCEPTION; 
                    else                 getch a;
                    fi;

                else
                    ch =   string::get_byte_as_char(*b, *p);
                    #
                    if (ch == '\n')
                        line_num := *line_num + 1;
                    fi;

                    p := *p + 1;
                    ch;
                fi;


            fun ungetch (BUF (s,{ b, p } ))
                =
                {   p := *p - 1;
                    #
                    if (string::get_byte_as_char(*b,*p) == '\n')
                        line_num := *line_num - 1;
                    fi;
                };
        end;

        exception ERROR;

        fun pr_err x
            =
            {   fil::write (
                    fil::stderr,
                    string::cat [
                        "mythryl-lex: error, line ",
                        (int::to_string *line_num),
                        ": ",
                        x,
                        "\n"
                    ]
                );

                raise exception ERROR;
            };

        fun pr_syn_err x
            =
            {   fil::write (
                    fil::stderr,
                    string::cat [
                        "mythryl-lex: syntax error, line ",     # <-- Only line differing from above fn.
                        (int::to_string *line_num),
                        ": ",
                        x,
                        "\n"
                    ]
                );

                raise exception ERROR;
            };

        exception SYNTAX_ERROR;                 # Error in user's input file.

        exception LEX_ERROR;            # Unexpected error in lexer.

        lex_buf   =  REF (make_ibuf fil::stdin);
        lex_state =  REF 0;
        next_tok  =  REF BOF;
        inquote   =  REF FALSE;

        fun advance_tok () : Void
            =
            {   fun is_letter c
                    =
                    (c >= 'a'  and  c <= 'z') or
                    (c >= 'A'  and  c <= 'Z');

                fun is_digit c
                    =
                    (c >= '0') and (c <= '9');

                fun is_odigit c
                    =
                    (c >= '0') and (c <= '7');

                fun is_xdigit c
                    =
                    ((c >= '0') and (c <= '9'))
                    or
                    ((c >= 'a') and (c <= 'f'))
                    or
                    ((c >= 'A') and (c <= 'F'));

                #  Check for valid (non-leading) identifier character (added by John H Reppy) 

                fun is_ident_chr c
                    =
                    (   is_letter c
                    or  is_digit  c
                    or  c == '_'
                    or  c == '\''
                    );

                fun atoi s
                    =
                    num (explode s, 0)
                    where
                        fun num (c ! r, n)
                                =>
                                if (is_digit c)   num (r, 10*n + (char::to_int c - char::to_int '0'));
                                else              n;
                                fi;

                            num ([], n)
                                =>
                                n;
                        end;
                    end;

                fun skipws ()
                    =
                    {   ch = nextch();

                        if  (char::is_space ch)   skipws();
                        else                      ch;
                        fi;
                    }

                also
                fun nextch ()
                    =
                    getch *lex_buf

                also
                fun escaped ()
                    =
                    case (nextch ())
                        #
                        'b' => '\x08';
                        'n' => '\n';
                        'r' => '\r';
                        't' => '\t';
                        'h' => '\x80';

                        'x' =>  {   fun err t
                                        =
                                        pr_err("illegal ascii hex escape '\\x" + (implode (reverse t)) + "'");

                                    fun convert c
                                        =
                                        case c
                                            '0' => 0;
                                            '1' => 1;
                                            '2' => 2;
                                            '3' => 3;
                                            '4' => 4;
                                            '5' => 5;
                                            '6' => 6;
                                            '7' => 7;
                                            '8' => 8;
                                            '9' => 9;
                                            'a' => 10;  'A' => 10;
                                            'b' => 11;  'B' => 11;
                                            'c' => 12;  'C' => 12;
                                            'd' => 13;  'D' => 13;
                                            'e' => 14;  'E' => 14;
                                            'f' => 15;  'F' => 15;
                                            _   => raise exception DIE "Impossible";
                                        esac;

                                    fun f (i, count, chars)
                                        =
                                        if  (count == 2)
                                            if (i >= *char_set_size)   err chars;
                                            else                       char::from_int i;
                                            fi;
                                        else
                                            ch = nextch ();

                                            if  (is_xdigit ch)   f (i*16+(convert ch), count+1, ch ! chars);
                                            else                 err chars;
                                            fi;
                                        fi;

                                    x = nextch ();

                                    if (is_xdigit x)  f (convert x, 1, [x]);
                                    else              x;
                                    fi;
                               };

                         x  =>  {   fun err t
                                        =
                                        pr_err("illegal ascii octal escape '\\" + (implode (reverse t)) + "'");

                                    fun convert c
                                        =
                                        char::to_int c - char::to_int '0';

                                    fun f (i, count, chars)
                                        =
                                        if  (count == 3)
                                            if (i >= *char_set_size)   err chars;
                                            else                       char::from_int i;
                                            fi;
                                        else
                                            ch = nextch ();

                                            if  (is_odigit ch)   f (i*8+(convert ch), count+1, ch ! chars);
                                            else                 err chars;
                                            fi;
                                        fi;

                                    if (is_odigit x)   f (convert x, 1, [x]);
                                    else               x;
                                    fi;
                               };
                     esac

                also
                fun onechar x
                    =
                    {   c = make_rw_vector (*char_set_size, FALSE);
                        #
                        set (c, char::to_int x, TRUE);

                        CHARS c;
                    };

                case *lex_state
                    #
                    0 =>
                        next_tok := make_tok ()
                        where
                            make_tok
                                =
                                \\ ()
                                    =
                                    case (skipws ())
                                        #
                                        #  Lex % operators 
                                        #
                                        '%' =>  case (nextch ())    
                                                    '%' => LEXMARK;
                                                    a   =>
                                                        {   fun f s
                                                                =
                                                                {   a = nextch();
                                                                    if (is_letter a)
                                                                        f (a ! s);
                                                                    else
                                                                        ungetch *lex_buf;
                                                                        implode (reverse s);
                                                                    fi;
                                                                };

                                                            case (f [a])
                                                                "reject" => REJECT;
                                                                "count"  => COUNT;
                                                                "full"   => FULLCHARSET;
                                                                "s"      => LEXSTATES;
                                                                "S"      => LEXSTATES;
                                                                "package" => STRUCT;
                                                                "header" => HEADER;
                                                                "arg"    => ARG;
                                                                "posarg" => POSARG;
                                                                _ => pr_err "unknown % operator ";
                                                            esac;
                                                        };
                                                esac;

                                        # Semicolon (for end of LEXSTATES):
                                        #
                                        ';' => SEMI;

                                        # Anything else:
                                        #
                                        ch =>   if (is_letter ch)

                                                    fun get_id matched
                                                        =
                                                        {   x = nextch();
                          /**** fix by John H Reppy
                                                            if is_letter x or is_digit x or
                                                               x == "_" or x == "'"
                          ****/
                                                            if (is_ident_chr  x)
                                                                get_id (x ! matched);
                                                            else
                                                                ungetch *lex_buf;
                                                                implode (reverse matched);
                                                            fi;
                                                        };
                                                    ID (get_id [ch]);

                                                else
                                                    pr_syn_err (string::cat [
                                                       "bad character: \"", char::to_string ch, "\""
                                                    ]);
                                                fi;
                                    esac;


                        end;

                    1 =>
                        {   recursive my make_tok
                                =
                                \\ ()
                                    =
                                    if *inquote
                                        #
                                        case (nextch ())   
                                            #
                                            # Inside quoted string 
                                            #
                                            '\\' => onechar (escaped());

                                            '"'  => {   inquote := FALSE;
                                                        make_tok();
                                                    };

                                            '\n' => {   pr_syn_err "end-of-line inside quoted string";
                                                        inquote := FALSE;
                                                        make_tok();
                                                    };

                                            x    => onechar x;
                                        esac;
                                    else
                                        case (skipws ())

                                            # Single character operators:
                                            #
                                            '?' => QMARK;
                                            '*' => STAR;
                                            '+' => PLUS;
                                            '|' => BAR;
                                            '(' => LP;
                                            ')' => RP;
                                            '^' => CARAT;
                                            '$' => DOLLAR;
                                            '/' => SLASH;
                                            ';' => SEMI;

                                            '.' =>  {   c = make_rw_vector (*char_set_size, TRUE); 
                                                        set (c, 10, FALSE);
                                                        CHARS c;
                                                    };

                                                    # Assign and arrow 
                                            '=' =>  {   c = nextch(); 

                                                        if (c == '>')
                                                            ARROW;
                                                        else
                                                            ungetch *lex_buf;
                                                            ASSIGN;
                                                        fi;
                                                    };

                                                    # Character set:
                                            '[' =>  {   recursive my ilkch
                                                            =
                                                            \\ () = {   x = skipws();
                                                                        #
                                                                        if (x == '\\')  escaped ();
                                                                        else            x;
                                                                        fi;
                                                                    };
                                                        first = ilkch();
                                                        flag = (first != '^');
                                                        c = make_rw_vector(*char_set_size, not flag);

                                                        fun add NULL    =>  ();
                                                            add (THE x) =>  set (c, char::to_int x, flag);
                                                        end 

                                                        also
                                                        fun range (x, y)
                                                            =
                                                            if (x > y)
                                                                pr_err "bad char. range";
                                                            else
                                                                i = REF (char::to_int x);
                                                                j = char::to_int y;

                                                                for (*i <= j) {
                                                                    add (THE (char::from_int *i));
                                                                    i := *i + 1;
                                                                };
                                                            fi

                                                        also
                                                        fun get_ilk last
                                                            =
                                                            case (ilkch ())

                                                                ']' =>  {   add last;
                                                                            c;
                                                                        };

                                                                '-' =>  case last

                                                                            NULL
                                                                                =>
                                                                                get_ilk (THE '-');

                                                                            THE last'
                                                                                =>
                                                                                {   x = ilkch ();

                                                                                    if (x == ']')
                                                                                        add last;
                                                                                        add (THE '-'); c;
                                                                                    else
                                                                                        range (last', x);
                                                                                        get_ilk NULL;
                                                                                    fi;
                                                                                };
                                                                        esac;

                                                                x   =>  {   add last;
                                                                            get_ilk (THE x);
                                                                        };
                                                            esac;

                                                        CHARS (get_ilk (first == '^'  ??  NULL  :: THE first));
                                                    };

                                            # Start States specification:
                                            # 
                                            '<' =>  {   recursive my get_state
                                                            =
                                                            \\ (prev, matched)
                                                                =
                                                                case (nextch ())
                                                                    #
                                                                    '>' =>  matched ! prev;
                                                                    ',' =>  get_state (matched ! prev, "");
                                                                     x  =>  if  (is_ident_chr  x)   get_state (prev, matched + string::from_char  x);
                                                                            else                    pr_syn_err "bad start state list";
                                                                            fi;
                                                                esac;

                                                        STATE (get_state (NIL, ""));
                                                    };
                                                    #  { id } or repetitions 

                                            '{' =>  {   ch = nextch();
                                                        #
                                                        if (is_letter ch)
                                                            #
                                                            fun get_id matched
                                                                =
                                                                case (nextch ())
                                                                    #
                                                                    '}' => matched;

                                                                     x =>   if (is_ident_chr x)
                                                                                #
                                                                                get_id (matched + string::from_char x);
                                                                            else
                                                                                pr_err "invalid char. class name";
                                                                            fi;
                                                                esac;

                                                            ID (get_id (string::from_char ch));

                                                        elif (is_digit ch)

                                                            fun get_r (matched, r1)
                                                                =
                                                                case (nextch ())
                                                                    #
                                                                    '}' =>  {   n = atoi matched; 
                                                                                #
                                                                                if (r1 == -1)  (n, n);
                                                                                else          (r1, n);
                                                                                fi;
                                                                            };

                                                                    ',' =>  if (r1 == -1)    get_r("", atoi matched);
                                                                            else             pr_err "invalid repetitions spec.";
                                                                            fi;

                                                                    x   =>  if (is_digit x)  get_r (matched + string::from_char x, r1);
                                                                            else             pr_err "invalid char in repetitions spec";
                                                                            fi;
                                                                esac;

                                                            REPS (get_r (string::from_char ch, -1));

                                                        else
                                                            pr_err "bad repetitions spec";
                                                        fi;
                                                    };

                                                    # Lex % operators: 
                                            '\\' => onechar (escaped());

                                                    # Start quoted string: 
                                                    #
                                            '"' =>  {   inquote := TRUE;
                                                        make_tok ();
                                                    };

                                                    # Anything else: 
                                                    #
                                            ch  =>  onechar ch;
                                        esac;
                                    fi;

                            next_tok := make_tok();
                        };

                    2   =>  next_tok
                                :=
                                case (skipws ())

                                    '(' =>
                                        {   fun loop_to_end (backslash, x)
                                                =
                                                {   c    = getch *lex_buf;
                                                    notb = not backslash;
                                                    nstr = c ! x;

                                                    case c
                                                        '"' =>  if notb  nstr;
                                                                else     loop_to_end (FALSE, nstr);
                                                                fi;

                                                        _   =>  loop_to_end (c == '\\' and notb, nstr);
                                                    esac;
                                                };

                                            fun get_act (lpct, x)
                                                =
                                                {   c    = getch *lex_buf;
                                                    nstr = c ! x;

                                                    case c
                                                         '"' => get_act (lpct, loop_to_end (FALSE, nstr));
                                                         '(' => get_act (lpct + 1, nstr);

                                                         ')' => if (lpct == 0 ) implode (reverse x);
                                                                else get_act (lpct - 1, nstr);
                                                                fi;

                                                         _   => get_act (lpct, nstr);
                                                    esac;
                                                };

                                            ACTION (get_act (0, NIL));
                                        };

                                    ';' => SEMI;

                                    c   => (pr_syn_err ("invalid character " + string::from_char c));

                                esac;

                    _   =>  raise exception LEX_ERROR;

                esac;
            }
            except
                EOF_EXCEPTION
                    =
                    next_tok := EOF;

        fun get_tok (_: Void) : Token
            = 
            {   t = *next_tok;
                advance_tok();
                t;
            };

        sym_tab
            =
            REF (create string::(<=)) : Ref( Dictionary( String, Expression ) );

        fun get_expression () : Expression
            =
            expression0 ()
            where
                recursive my optional
                    =
                    \\ e =  ALT (EPS, e)

                also
                lookup'
                    =
                    \\ name
                        =
                        lookup *sym_tab name 
                        except
                            LOOKUP
                                =
                                pr_err ("bad regular expression name: " + name)

                also
                newline
                    =
                    \\ ()
                        =
                        {   c = make_rw_vector (*char_set_size, FALSE); 
                            set (c, 10, TRUE);
                            c;
                        }

                also
                endline
                    =
                    \\ e =  trail (e, ILK (newline(), 0))

                also
                trail
                    =
                    \\ (e1, e2)
                        =
                        CAT (CAT (e1, TRAIL 0), e2)

                also
                closure1
                    =
                    \\ e
                        =
                        CAT (e, CLOSURE e)

                also
                repeat
                    =
                    \\ (min, max, e)
                        =
                        rep (min, max)
                        where 
                            recursive my rep
                                =
                                \\ (0, 0) =>  EPS;
                                   (0, 1) =>  ALT (e, EPS);
                                   (0, i) =>  CAT (rep (0, 1), rep (0, i - 1));
                                   (i, j) =>  CAT (e, rep (i - 1, j - 1));
                                end;
                        end

                also
                expression0
                    =
                    \\ ()
                        =
                        case (get_tok ())
                            #
                            CHARS c => expression1 (ILK (c, 0));

                            LP =>   {   e = expression0 ();
                                        #       
                                        if (*next_tok == RP)
                                            #
                                            advance_tok ();
                                            expression1 e;
                                        else
                                            pr_syn_err "missing '('";
                                        fi;
                                    };

                            ID name => expression1 (lookup' name);

                            _ => raise exception SYNTAX_ERROR;
                        esac

                also
                expression1
                    =
                    \\ e
                        =
                        case *next_tok
                            #
                            SEMI => e;
                            ARROW => e;
                            EOF => e;
                            LP => expression2 (e, expression0());
                            RP => e;

                            t   =>  {   advance_tok();
                                        #
                                        case t
                                            QMARK   => expression1 (optional e);
                                            STAR    => expression1 (CLOSURE e);

                                            PLUS    => expression1 (closure1 e);
                                            CHARS c => expression2 (e, ILK (c, 0));

                                            BAR     => ALT (e, expression0());

                                            DOLLAR  =>  {   uses_trailing_context := TRUE;
                                                            endline e;
                                                        };

                                            SLASH   =>  {   uses_trailing_context := TRUE;
                                                            trail (e, expression0());
                                                        };

                                            REPS (i, j)
                                                =>
                                                expression1 (repeat (i, j, e));

                                            ID name
                                                =>
                                                expression2 (e, lookup' name);

                                            _ => raise exception SYNTAX_ERROR;
                                        esac;
                                    };
                        esac

                also
                expression2
                    =
                    \\ (e1, e2)
                        =
                        case *next_tok
                            #
                            SEMI  => CAT (e1, e2);
                            ARROW => CAT (e1, e2);
                            EOF   => CAT (e1, e2);
                            LP    => expression2 (CAT (e1, e2), expression0());
                            RP    => CAT (e1, e2);

                            t   =>  {   advance_tok();
                                        #
                                        case t
                                            QMARK => expression1 (CAT (e1, optional e2));
                                            STAR  => expression1 (CAT (e1, CLOSURE e2));
                                            PLUS  => expression1 (CAT (e1, closure1 e2));

                                            CHARS c => expression2 (CAT (e1, e2), ILK (c, 0));
                                            BAR     => ALT (CAT (e1, e2), expression0());

                                            DOLLAR  =>  {   uses_trailing_context := TRUE;
                                                            endline (CAT (e1, e2));
                                                        };
                                            SLASH   =>  {   uses_trailing_context := TRUE;
                                                            trail (CAT (e1, e2), expression0());
                                                        };

                                            REPS (i, j)
                                                =>
                                                expression1 (CAT (e1, repeat (i, j, e2)));

                                            ID name
                                                =>
                                                expression2 (CAT (e1, e2), lookup' name);

                                            _   => raise exception SYNTAX_ERROR;
                                        esac;
                                    };
                        esac;
            end;                                        # fun get_expression

        state_tab
            =
            REF (create (string::(<=))) : Ref( Dictionary( String, Int ) );

        state_num = REF 0;

        fun get_states () : List( Int )
            =
            {   fun add NIL sl
                        =>
                        sl;

                    add (x ! y) sl
                        =>
                        add y (union ( [ lookup *state_tab x
                                                       except
                                                           LOOKUP = pr_err ("bad state name: " + x)
                                                     ],
                                                     sl));
                end;

                fun addall i sl
                    = 
                    if  (i <= *state_num)   addall (i+2) (union ([i], sl));
                    else                    sl;
                    fi;

                fun incall (x ! y) =>  (x+1) ! incall y;
                    incall NIL     =>  NIL;
                end;

                fun addincs (x ! y) =>  x ! (x+1) ! addincs y;
                    addincs NIL     =>  NIL;
                end;

                state_list
                    =
                    case *next_tok

                        STATE s =>  {   advance_tok(); 
                                        lex_state := 1;
                                        add s NIL;
                                    };

                        _       =>  addall 1 NIL;
                    esac;

                case *next_tok
                    #
                    CARAT
                        =>
                        {   lex_state := 1;
                            advance_tok ();
                            uses_previous_newline := TRUE;
                            incall state_list;
                        };

                    _   =>
                        addincs state_list;
                esac;
            };                  # fun get_states


        leaf_num = REF -1;


        fun renum (e:  Expression) : Expression
            =
            label e
            where
                recursive my label
                    =
                    \\ EPS          =>  EPS;
                       ILK (x, _)   =>  ILK (x,+++leaf_num);
                       CLOSURE e    =>  CLOSURE (label e);
                       ALT (e1, e2) =>  ALT (label e1, label e2);
                       CAT (e1, e2) =>  CAT (label e1, label e2);
                       TRAIL i      =>  TRAIL(+++leaf_num);
                       END i        =>  END(+++leaf_num);
                    end;
            end;

        exception PARSE_ERROR;


        fun parse () : ((String,  List( (List( Int ), Expression)),  Dictionary (String, String)))
            =
            {   accept
                    =
                    REF (create string::(<=)) : Ref( Dictionary( String, String ) );

                recursive my parse_rtns
                    =
                    \\ l =  case (getch *lex_buf)
                                #
                                '%' => {   c = getch *lex_buf; 

                                           if (c == '%')   implode (reverse l);
                                           else            parse_rtns (c ! '%' ! l);
                                           fi;
                                       };

                                c   => parse_rtns (c ! l);
                            esac

                also
                parse_defs
                    =
                    \\ ()
                        =
                        {   lex_state := 0;
                            advance_tok ();

                            case *next_tok
                                #
                                LEXMARK
                                    =>
                                    ();

                                LEXSTATES
                                    =>
                                    {   fun f ()
                                            =
                                            case *next_tok

                                                ID i
                                                    =>
                                                    {   state_tab := enter *state_tab (i, +++state_num);
                                                        +++state_num;
                                                        advance_tok ();
                                                        f ();
                                                    };

                                                _ => ();
                                            esac;

                                        advance_tok();

                                        f ();

                                        if  (*next_tok == SEMI)   parse_defs ();
                                        else                      pr_syn_err "expected ';'";
                                        fi;
                                    };

                                ID x
                                    =>
                                    {   lex_state := 1;
                                        #
                                        advance_tok ();

                                        if (get_tok() == ASSIGN)
                                            #
                                            sym_tab := enter *sym_tab (x, get_expression());

                                            if  (*next_tok == SEMI)   parse_defs();
                                            else                      pr_syn_err "expected ';'";
                                            fi;
                                        else
                                            raise exception SYNTAX_ERROR;
                                        fi;
                                    };

                                REJECT      =>  {   have_reject    := TRUE;   parse_defs(); };
                                COUNT       =>  {   count_newlines := TRUE;   parse_defs(); };
                                FULLCHARSET =>  {   char_set_size  := 256;    parse_defs(); };

                                HEADER =>   {   lex_state := 2; advance_tok();
                                                #
                                                case (get_tok ())
                                                    #
                                                    ACTION s
                                                        => 
                                                        if  *package_declaration
                                                            (pr_err "cannot have both %package and %header \
                                                            \declarations");
                                                        elif *header_decl 
                                                            pr_err "duplicate %header declarations";
                                                        else 
                                                            header_code :=  s;
                                                            lex_state   :=  0;
                                                            header_decl :=  TRUE;
                                                            parse_defs();
                                                        fi;

                                                    _ => raise exception SYNTAX_ERROR;
                                                esac;
                                            };

                                POSARG =>   {   pos_arg := TRUE;
                                                parse_defs ();
                                            };

                                ARG =>  {   lex_state := 2;
                                            advance_tok();

                                            case (get_tok ())
                                                #
                                                ACTION s
                                                    => 
                                                    {   case *arg_code
                                                            THE _ =>  pr_err "duplicate %arg declarations";
                                                            NULL  =>  arg_code := THE s;
                                                        esac;

                                                        lex_state := 0;

                                                        parse_defs ();
                                                    };

                                                 _ => raise exception SYNTAX_ERROR;
                                             esac;
                                        };

                                STRUCT  =>  {   advance_tok();
                                                #
                                                case *next_tok
                                                    #
                                                    ID i => if *header_decl
                                                                #
                                                                pr_err "cannot have both %package and %header \
                                                                       \declarations";
                                                            elif *package_declaration

                                                                pr_err "duplicate %package declarations";

                                                            else
                                                                package_name        := i;
                                                                package_declaration := TRUE; 
                                                            fi;

                                                      _  => (pr_err "expected ID");
                                                esac;

                                                parse_defs ();
                                            };

                                _ => raise exception SYNTAX_ERROR; 
                            esac;
                       }                        # fun parse_defs

                also
                parse_rules
                    =
                    \\ rules
                        =
                        {   lex_state := 1;
                            #
                            advance_tok ();

                            case *next_tok
                                #
                                EOF => rules;

                                _   =>
                                    {   s =  get_states();
                                        #
                                        e =  renum (CAT (get_expression(), END 0));

                                        if  (*next_tok == ARROW)
                                            #
                                            lex_state := 2;
                                            advance_tok ();

                                            case (get_tok ())
                                                #
                                                ACTION act
                                                    =>
                                                    if (*next_tok == SEMI)
                                                        #
                                                        accept := enter *accept (int::to_string *leaf_num, act);
                                                        parse_rules((s, e) ! rules);
                                                    else 
                                                        pr_syn_err "expected ';'";
                                                    fi;

                                                _   =>
                                                    raise exception SYNTAX_ERROR;
                                            esac;
                                        else
                                            pr_syn_err "expected '=>'";
                                        fi;
                                    };
                            esac;
                        };

                usercode =  parse_rtns  NIL;
                parse_defs ();

                ( usercode,
                  parse_rules NIL,
                  *accept
                );
            }
            except
                SYNTAX_ERROR
                    =
                    pr_syn_err "";

        fun makebegin () : Void
            =
            {   fun make ((x, n: Int) ! y)
                        =>
                        {   say "my ";
                            say x;
                            say " = " ;
                            say "STARTSTATE ";
                            say (int::to_string n);
                            say ";\n";
                            make y;
                        };

                    make NIL
                        =>
                        ();
                end;

                say "\n#  start state definitions \n\n";

                make (listofdict *state_tab);
            };

        package l
            = 
            package {

                nonfix my  > ;

                Key =  (List (Int), String);

                fun > ((key, item: String), (key', item'))
                    =
                    f key key'
                    where
                        fun f ((a: Int) ! a') (b ! b')
                                =>
                                if   (int::(>) (a, b))  TRUE;
                                elif (a == b)           f a' b';
                                else                    FALSE;
                                fi;
                            f _ _
                                =>
                                FALSE;
                        end;
                    end;
            };

        package rb
            =
            red_black_g( l );

        fun maketable (fins: List( (Int, (List( Int )))),
                     tcs:  List ((Int, (List( Int )))),
                     tcpairs:  List ((Int, Int)),
                     trans:   List ((Int,(List( Int ))))) : Void
            =
            {   # Fins = List (state #, list of final leaves for the state)
                #  tcs = List (state #, list of trailing context leaves which begin in this state)
                #        
                #   tcpairs = List (trailing context leaf, end leaf)
                #   trans   = List (state #, list of transitions for state)

                Element = NN  Int | TT  Int | DD  Int;

                count = REF 0;

                char_format :=   length trans < 256;

                if *uses_trailing_context   say "\nYyfinstate = NN Int | TT Int | DD Int;\n";
                else                        say "\nYyfinstate = NN Int;";
                fi;

                say "\nStatedata = { fin:  List( Yyfinstate ), trans: ";

                case *char_format
                    TRUE  =>  say "String };";
                    FALSE =>  say "vector::Vector( Int ) };";
                esac;

                say "\n\
                     \#  transition & final state table \n\
                     \tab = {\n";

                case *char_format
                    #
                    TRUE => ();

                    FALSE =>
                        {   say "fun decode s k =\n";
                            say "  {   k' = k + k;\n";
                            say "      hi = string::get_byte (s, k');\n";
                            say "      lo = string::get_byte (s, k' + 1);\n";
                            say "\n";
                            say "      hi * 256 + lo;\n";
                            say "  };\n";
                        };
                esac;

                newfins
                    =
                    {   fun is_end_leaf t
                            =
                            f tcpairs
                            where 
                                fun f ((l, e) ! r) =>  if (e==t)   TRUE;
                                                       else        f r;
                                                       fi;

                                    f NIL          =>  FALSE;
                                end;
                            end;

                        fun get_end_leaf t
                            =
                            f tcpairs
                            where
                                fun f ((tl, el) ! r)
                                        =>
                                        tl == t   ??   el
                                                  ::   f r;

                                    f _ => raise exception MATCH;
                                end;
                            end;

                        fun get_tr_con_leaves s
                            =
                            f tcs
                            where
                                fun f ((s', l) ! r)
                                        =>
                                        s == s'   ??   l
                                                  ::   f r;

                                    f NIL => NIL;
                                end;
                            end;

                        fun sort_leaves s
                            =
                            {   fun insert (x: Int) (a ! b)
                                        =>
                                        if (x <= a)  x ! (a ! b);
                                        else         a ! (insert x b);
                                        fi;

                                    insert x NIL
                                        =>
                                        [x];
                                end;

                                list::fold_backward
                                    (\\ (x, r) = insert x r)
                                    [] s;
                            };

                        fun conv a
                            =
                            is_end_leaf a   ??   DD a
                                            ::   NN a;

                        fun merge (a ! a', b ! b')
                                =>
                                if   (a <= b)   (conv a) !  merge (a', b ! b');
                                else            (TT   b) !  merge (a ! a', b');
                                fi;

                            merge (a ! a', NIL) => (conv a) ! (merge (a', NIL));
                            merge (NIL, b ! b') => (TT b) ! (merge (b', NIL));
                            merge (NIL, NIL) => NIL;
                        end;

                        map
                            (\\ (x, l)
                                =
                                reverse (
                                    merge (
                                        l,
                                        sort_leaves (
                                            map
                                            (\\ x =  get_end_leaf x)
                                            (get_tr_con_leaves x)
                                        )
                                    )
                                )
                            )
                            fins;
                    };

                rs  =   result
                        where
                            include package   rb;
                            #
                            fun make_items x
                                =
                                {   fun emit8 (x, pos)
                                        =
                                        {   s =   sprintf "x%02x" x;                                    # Was:  number_string::pad_left '0' 3 (int::to_string x);
                                            #
                                            case pos
                                                16      => { say "\\\n\\\\";   say s;      1; };
                                                _       => { say "\\";         say s;  pos+1; };
                                            esac;
                                        };

                                    fun emit16 (x, pos)
                                        =
                                        {   hi8 = x / 256;
                                            lo8 = x - hi8 * 256;        #  x rem 256 

                                            emit8 (lo8, emit8 (hi8, pos));
                                        };


                                    fun make_string ([], _, _)
                                            =>
                                            ();

                                        make_string (x ! xs, emitter, pos)
                                            =>
                                            make_string (xs, emitter, emitter (x, pos));
                                    end;

                                    case *char_format
                                        #
                                        TRUE    =>  {   say " \n\"";
                                                        make_string (x, emit8, 0);
                                                        say "\"\n";
                                                    };

                                        FALSE   =>  {   say (int::to_string (length x));
                                                        say ", \n\"";
                                                        make_string (x, emit16, 0);
                                                        say "\"\n";
                                                    };
                                    esac;
                                };


                            fun make_entry (NIL, rs, t)
                                    =>
                                    reverse rs;

                                make_entry(((l: Int, x) ! y), rs, t)
                                    =>
                                    {   name = (int::to_string l);
                                        #
                                        {   my (r, n)
                                                =
                                                lookup ((x, name), t);

                                            make_entry (y, (n ! rs), t);
                                        }
                                        except
                                            NOT_FOUND _
                                                =
                                                {   count := *count+1;
                                                    say " (";
                                                    say name;
                                                    say ", ";
                                                    make_items x;
                                                    say "),\n";
                                                    make_entry (y, (name ! rs), (insert ((x, name), t)));
                                                };
                                    };
                            end;

                            say "    s = [ \n"; 

                            result =  make_entry (trans, NIL, empty);

                            case *char_format 
                                #
                                TRUE
                                    =>
                                    {   say "    (0, \"\")];\n";
                                        say "    fun f x = x;\n";
                                    };

                                FALSE
                                    =>
                                    {   say "    (0, 0, \"\")];\n";
                                        say "    fun f (n, i, x) = (n, vector::from_fn (i, decode x));\n";
                                    };
                            esac;

                            say "    s = map f (reverse (tail (reverse s)));\n";
                            say "    exception LEX_HACKING_ERROR;\n";
                            say "    fun get ((j, x) ! r, i: Int)\n";
                            say "            =>\n";
                            say "            if (i == j)  x;   else get (r, i); fi;\n\n";
                            say "        get ([], i)\n";
                            say "            =>\n";
                            say "            raise exception LEX_HACKING_ERROR;\n";
                            say "    end;\n";

                            say "fun g {   fin => x,   trans => i   }\n";
                            say "    =\n";
                            say "    {   fin => x,   trans => get (s, i)   };\n";
                        end;

                fun make_table args
                    =
                    maketable args
                    where  

                        fun make_one (a, b)
                            =
                            {   fun item (NN i) => ("NN", i);
                                    item (TT i) => ("TT", i);
                                    item (DD i) => ("DD", i);
                                end;

                                fun make_item x
                                    =
                                    {   my (t, n)
                                            =
                                            item x;

                                        apply say ["(", t, " ", int::to_string n, ")"];
                                    };

                                fun make_items []  =>  ();
                                    make_items [x] =>  make_item x;

                                    make_items (hd ! tl)
                                        =>
                                        {   make_item hd;
                                            say ", ";
                                            make_items tl;
                                        };
                                end;

                                say "{ fin => [";
                                make_items b;
                                apply say ["], trans => ", a, "}"];
                            };

                        fun maketable ([], []) => ();
                            maketable ([a], [b]) => make_one (a, b);

                            maketable (a ! a', b ! b')
                                 =>
                                 {   make_one (a, b);
                                     say ",\n";
                                     maketable (a', b');
                                 };

                            maketable _ => raise exception MATCH;
                        end;
                    end;


            #   fun make_table (NIL, NIL) => ();
            #      make_table (a ! a', b ! b') =>
            #        {   funx make_items NIL = ()
            #              | make_items (hd ! tl) =
            #                { my (t, n) =
            #                    case hd of
            #                      (NN i) => ("(NN ", i)
            #                    | (TT i) => ("(TT ", i)
            #                    | (DD i) => ("(DD ", i);
            #                    say t; say (int::to_string n); say ")";
            #                    if (null tl)
            #                         ();
            #                    else (say ", "; make_items tl); fi;
            #                };
            #             say "{ fin = ["; make_items b;
            #             say "], trans = "; say a; say "}";
            #             if (null a')
            #                ();
            #             else (say ",\n"; make_table (a', b')); fi;
            #         };
            #        end;


                fun msg x
                    =
                    fil::say {. x; };

                say " vector::from_list (map g \n[";
                make_table (rs, newfins); 
                say "]);\n};\n";

                msg (  "                              lexgen.pkg:   Number of states = " + (int::to_string (length trans)));
                msg (  "                              lexgen.pkg:   Number of distinct rows = " + (int::to_string *count));

                msg (  "                              lexgen.pkg:   Approximate memory size of translation table = "
                    +  (int::to_string (*count * *char_set_size * (*char_format ?? 1 :: 8)))
                    +  " bytes\n\n"
                    );
            };

        #   makeaccept: Takes a (String, String) dictionary, prints case statement for
        #   accepting leaf actions.  The key strings are the leaf #'s, the data strings
        #   are the actions

        fun makeaccept ends
            =
            make (listofdict ends, TRUE)
            where 

                fun startline f
                    =
                    say "  ";

                fun make (NIL, f)
                    => 
                    {   startline f;
                        say "_ => raise exception internal::LEXER_ERROR;\n";
                    };

                    make ((x, a) ! y, f)
                        =>
                        {   startline f;
                            say x;
                            say " => ";

                            if  (substring::size(#2 (substring::position "yytext" (substring::from_string a)))  ==  0)

                                say "{ ";
                                say a;
                                say "; };";
                            else
                                say "{   yytext=yymktext();\n";
                                say a;
                                say "; };";
                            fi;

                            say "\n";

                            make (y, FALSE);
                        };
                end;
            end;

        fun leafdata (e: List( (List( Int ), Expression)))
            =
            {   fp   =  make_rw_vector (*leaf_num + 1, NIL);
                leaf =  make_rw_vector (*leaf_num + 1, EPS);

                tcpairs   = REF NIL;
                trailmark = REF -1;

                recursive my add
                    =
                    \\ (NIL,     x) => ();
                       (hd ! tl, x) => {   set (fp, hd, union (fp[ hd ], x));
                                           add (tl, x);
                                       };
                    end 

                also
                moredata
                    =
                    \\  CLOSURE e1 =>   {   moredata e1;
                                            add (lastpos e1, firstpos e1);
                                        };

                        ALT (e1, e2) => {   moredata e1;
                                            moredata e2;
                                        };

                        CAT (e1, e2) => {   moredata e1;
                                            moredata e2;
                                            add (lastpos e1, firstpos e2);
                                        };

                        ILK (x, i) => set (leaf, i, ILK (x, i));

                        TRAIL i =>      {   set (leaf, i, TRAIL i);

                                            if (*trailmark == -1)
                                                 trailmark :=  i;
                                            fi;
                                        };

                        END i =>        {   set (leaf, i, END i);
                                            #
                                            if (*trailmark != -1)
                                                 trailmark := -1;
                                                 tcpairs   :=  (*trailmark, i) ! *tcpairs;
                                            fi;
                                        };
                        _ => ();
                    end 

                also
                makedata
                    =
                    \\
                        NIL => ();

                        (_, x) ! tl
                            =>
                            {   moredata x;
                                makedata tl;
                            };
                    end;

                trailmark := -1;
                makedata e;

                (fp, leaf, *tcpairs);
            };

        fun makedfa rules
            =
            {   visitstarts( startstates() );

                ( listofdict *fintab,
                  listofdict *transtab,
                  listofdict *tctab,
                  tcpairs
                );
            }
            where

                state_tab = REF (create (string::(<=))):   Ref( Dictionary (String, Int        ));
                fintab    = REF (create    (int::(<=))):   Ref( Dictionary (Int,   (List( Int))));
                transtab  = REF (create    (int::(<=))):   Ref( Dictionary (Int,    List( Int)) );
                tctab     = REF (create    (int::(<=))):   Ref( Dictionary (Int,   (List( Int))));

                my (fp, leaf, tcpairs)
                    =
                    leafdata rules;

                fun visit (state, statenum)
                    =
                    {  transitions = gettrans state; 

                       fintab   := enter *fintab   (statenum, getfin state);
                       tctab    := enter *tctab    (statenum, gettc state);
                       transtab := enter *transtab (statenum, transitions);
                    }

                also
                fun visitstarts states
                    =
                    vs states 0
                    where
                        fun vs NIL i => ();
                            vs (hd ! tl) i => { visit (hd, i);   vs tl (i+1); };
                        end;
                    end

                also
                fun hashstate (s: List( Int ))
                    =
                    hs (s, "")
                    where
                        recursive my hs
                            =
                            \\ ((x: Int) ! y, z)
                                   =>
                                   hs (y, z + " " + (int::to_string x));

                               (NIL, z)
                                   =>
                                   z;
                            end;
                    end

                also
                fun find s
                    =
                    lookup *state_tab (hashstate s)

                also
                fun add (s, n)
                    =
                    state_tab := enter *state_tab (hashstate s, n)

                also
                fun getstate state
                    =
                    find state
                    except
                        LOOKUP
                            =
                            {   n = +++state_num; 
                                add (state, n);
                                visit (state, n);
                                n;
                            }

                also
                fun getfin state
                    =
                    f state NIL
                    where
                        fun f (hd ! tl) fins
                                =>
                                case (leaf[ hd ])
                                    END _ => f tl (hd ! fins);
                                    _     => f tl fins;
                                esac;

                            f NIL fins
                                =>
                                fins;
                        end;
                    end

                also
                fun gettc state
                    =
                    f state NIL
                    where
                        fun f (hd ! tl) fins
                                =>
                                case (leaf[ hd ])
                                    TRAIL _ =>  f tl (hd ! fins);
                                    _       =>  f tl fins;
                                esac;

                            f NIL fins
                                =>
                                fins;
                        end;
                    end

                also
                fun gettrans state
                    =
                    loop (*char_set_size - 1) NIL
                    where
                        fun loop c tlist
                            =
                            {   fun cktrans NIL r
                                        =>
                                        r;

                                    cktrans (hd ! tl) r
                                        =>
                                        case (leaf[ hd ])

                                            ILK (i, _)
                                                =>
                                                if (i[ c ])
                                                    cktrans tl (union (r, fp[ hd ]));
                                                else
                                                    cktrans tl r
                                                    except
                                                        INDEX_OUT_OF_BOUNDS
                                                            =
                                                            cktrans tl r;
                                                fi;

                                            _ => cktrans tl r;
                                        esac;
                                end;

                                if (c >= 0)
                                    v=cktrans state NIL;
                                    loop (c - 1) if (v==NIL ) 0 ! tlist; else (getstate v) ! tlist; fi;
                                else
                                    tlist;
                               fi;
                            };
                    end

                also
                fun startstates ()
                    =
                    {   makess rules;
                        listofarray (startarray, *state_num + 1);
                    }
                    where
                        startarray
                            =
                            make_rw_vector (*state_num + 1, NIL);

                        fun listofarray (a, n)
                            =
                            f (n - 1) NIL
                            where
                                fun f i l
                                    =
                                    i >= 0
                                     ?? f (i - 1) (a[i] ! l)
                                     ::                   l;
                            end;

                        recursive my makess
                            =
                            \\
                               NIL => ();

                               (startlist, e) ! tl
                                   =>
                                   {   fix (startlist, firstpos e);
                                       makess tl;
                                   };
                            end 

                        also
                        fix = \\
                                (NIL, _) => ();

                                (s ! tl, firsts)
                                    =>
                                    {   set (startarray,
                                             s,
                                             union (firsts, startarray[ s ])
                                        );

                                        fix (tl, firsts);
                                    };
                              end ;
                    end;


            end;                                # fun makedfa

        skel_hd
            = 
            "   \n\
            \    package user_declarations {\n\
            \      \n\
            \";

        skel_mid2
            =
            "                       | internal::DD k => action (i, (acts ! l), k ! rs)\n\
            \                       | internal::TT k =>\n\
            \                         {   fun f (a ! b, r)\n\
            \                                     =>\n\
            \                                     if (a == k)\n\
            \                                         action (i, (((internal::NN a) ! acts) ! l), (b@r));\n\
            \                                     else\n\
            \                                         f (b, a ! r);\n\
            \                                     fi;\n\
            \                                        \n\
            \                                 f (NIL, r)\n\
            \                                     =>\n\
            \                                     action (i, (acts ! l), rs);\n\
            \                             end;\n\
            \                             \n\
            \                             f (rs, NIL);\n\
            \                          }\n\
            \";


        fun lex_fn  infile
            =
            {   outfile = infile + ".pkg";

                fun print_lexer ends
                    =
                    {   sayln
                            =
                            \\ x = { say x;   say "\n"; };

                        case *arg_code 
                            #
                            NULL  => {   sayln "fun lex () : internal::Result =";
                                         sayln "{ fun continue () = lex(); ";
                                     };

                            THE s => {   say "fun lex ";
                                         say "(yyarg as (";
                                         say s;
                                         sayln ")) =";
                                         sayln " { fun continue () : internal::Result = ";
                                      };
                        esac;

                        say "  { fun scan (s, accepting_leaves:  List( List( internal::Yyfinstate";
                        sayln " ) ), l, i0) =";

                        if *uses_trailing_context   say "\t { fun action (i, NIL, rs)";
                        else                        say "\t { fun action (i, NIL)";
                        fi;

                        sayln " => raise exception LEX_ERROR;";

                        if *uses_trailing_context   sayln "\t action (i, NIL ! l, rs) => action (i - 1, l, rs);";
                        else                        sayln "\t action (i, NIL ! l)     => action (i - 1, l);";
                        fi;

                        if *uses_trailing_context   sayln "\t action (i, (node ! acts) ! l, rs) => ";
                        else                        sayln "\t action (i, (node ! acts) ! l) => ";
                        fi;

                        sayln "\t\t case node";
                        sayln "\t\t ";
                        sayln "\t\t    internal::NN yyk => ";
                        sayln "\t\t\t ( { fun yymktext () = substring(*yyb, i0, i-i0);\n\
                               \\t\t\t     yypos = i0 + *yygone;";

                        if *count_newlines 
                            sayln "\t\t\t yylineno := vector_slice_of_chars::keyed_fold_forward";
                            sayln "\t\t\t\t (\\\\ (_, '\\n', n) => n+1; (_, _, n) => n; end) *yylineno (vector_slice_of_chars::make_slice (*yyb, i0, THE (i-i0)));";
                        fi;

                        if *have_reject

                            say "\t\t\t fun REJECT() = action (i, acts ! l";

                            if *uses_trailing_context    sayln ", rs);";
                            else                         sayln     ");";
                            fi;
                        fi;      

                        sayln "\t\t\t include package   user_declarations;";
                        sayln "\t\t\t include package   internal::start_states;";
                        sayln "  {   yybufpos := i;";
                        sayln "      case yyk";
                        sayln " ";

                        sayln "";
                        sayln "\t\t\t#  Application actions \n";
                        makeaccept ends;
                        say "\n\t\t esac; }; } ";
                        say "); esac; end;    # fun action\n\n";

                        if *uses_trailing_context
                             say skel_mid2;
                        fi;

                        sayln "\t my { fin, trans } = unsafe::vector::get (internal::tab, s);";
                        sayln "\t new_accepting_leaves = fin ! accepting_leaves;";
                        sayln "\t if (l == *yybl)";
                        sayln "\t     if (trans == .trans (vector::get (internal::tab, 0)))";
                        say   "\t       action (l, new_accepting_leaves";

                        if *uses_trailing_context
                            say ", NIL";
                        fi;

                        say ");\n\t else";

                        sayln "\t     newchars= if *yydone \"\"; else yyinput 1024; fi;";
                        sayln "\t     if ((size newchars) == 0)";
                        sayln "\t\t        yydone := TRUE;";
                        say   "\t\t        if (l == i0)  user_declarations::eof ";

                        sayln
                            case *arg_code
                                NULL  => "();";
                                THE _ => "yyarg;";
                            esac;

                        say   "\t\t                  else action (l, new_accepting_leaves";

                        if *uses_trailing_context   sayln ", NIL); fi;";
                        else                    sayln      "); fi;";
                        fi;

                        sayln "\t\t  else if (l == i0)  yyb := newchars;";
                        sayln "\t\t\t     else yyb := substring(*yyb, i0, l-i0) + newchars; fi;";
                        sayln "\t\t       yygone := *yygone+i0;";
                        sayln "\t\t       yybl := size *yyb;";
                        sayln "\t\t       scan (s, accepting_leaves, l-i0, 0);";

                        sayln "\t     fi;   # (size newchars) == 0";
                        sayln "\t     fi;   # trans == $trans ...";

                        sayln "\t  else new_char = char::to_int (unsafe::vector_of_chars::get(*yyb, l));";

                        if (*char_set_size == 129)
                            sayln "\t\t new_char = if (new_char < 128) new_char; else 128; fi;"; 
                        fi;

                        say "\t\t new_state = ";

                        sayln (   if   *char_format 
                                       "char::to_int (unsafe::vector_of_chars::get (trans, new_char));";
                                  else
                                       "unsafe::vector::get (trans, new_char);";
                                  fi
                              );

                        say "\t\t if (new_state == 0) action (l, new_accepting_leaves";

                        if *uses_trailing_context   sayln ", NIL);";
                        else                        sayln      ");";
                        fi;

                        sayln "\t\t else scan (new_state, new_accepting_leaves, l+1, i0); fi;";
                        sayln "\t fi;";
                        sayln "  };    # fun scan";

                        if (not *uses_previous_newline)
                            sayln "/*";
                        fi;

                        say   "\t start= if (substring(*yyb,*yybufpos - 1, 1)==\"\\n\")";
                        sayln " *yybegin_i+1; else *yybegin_i; fi;";

                        if (not *uses_previous_newline)
                            sayln "*/";
                        fi;

                        say "\t scan(";

                        if *uses_previous_newline   say "start"; 
                        else                    say "*yybegin_i /* start */ ";
                        fi;

                        sayln ", NIL, *yybufpos, *yybufpos);   # fun continue";
                        sayln "    };   # fun continue";

                        sayln
                            case *arg_code
                                NULL  =>           " };    # fun lex";
                                THE _ => " continue; };    # fun lex";
                            esac;


                        sayln "  lex; ";
                        sayln "  };   # fun make_lexer";
                        sayln "};";
                    };                                  # fun print_lexer


                uses_previous_newline := FALSE;
                reset_flags();

                lex_buf   :=  make_ibuf  (fil::open_for_read  infile);
                next_tok  :=  BOF;
                inquote   :=  FALSE;

                lex_out   :=  fil::open_for_write  outfile;
                state_num :=  2;
                line_num  :=  1;

                state_tab := enter (create (string::(<=)))("initial", 1);
                leaf_num  := -1;

                my  (user_code, rules, ends)
                    =
                    parse()
                    except
                        x =  {   close_ibuf *lex_buf;
                                 fil::close_output *lex_out;
                                 winix__premicrothread::file::remove_file  outfile;
                                 raise exception x;
                             };

                my (fins, trans, tctab, tcpairs)
                    =
                    makedfa rules;

                if *uses_trailing_context
                    close_ibuf *lex_buf;
                    fil::close_output *lex_out;
                    winix__premicrothread::file::remove_file  outfile;
                    pr_err "lookahead is unimplemented";
                fi;

                if *header_decl     say *header_code;
                else                say ("package " + *package_name);
                fi;

                say "{\n";
                say skel_hd;
                say user_code;
                say "}; #  end of user routines \n";
                say "exception LEX_ERROR; # Raised if illegal leaf action tried.\n";
                say "package internal {\n\t \n";

                maketable (fins, tctab, tcpairs, trans);

                say "package start_states {\n\t \n";
                say "\t Yystartstate = STARTSTATE Int;\n";

                makebegin();

                say "\n };\n";
                say "Result = user_declarations::Lex_Result;\n";
                say "\t exception LEXER_ERROR; # Raised if illegal leaf action tried */\n";
                say "};\n\n";

                say     if *pos_arg       "fun make_lexer (yyinput, yygone0: Int) =\n { \n";
                        else      "fun make_lexer yyinput =\n{\t my yygone0=1;\n";
                        fi;


                if *count_newlines
                    say "\t my yylineno = REF 0;\n\n";
                fi;

                say "\t yyb = REF \"\\n\"; \t\t#  Buffer \n\
                     \\t yybl = REF 1;\t\t# Buffer length \n\
                     \\t yybufpos = REF 1;\t\t#  location of next character to use \n\
                     \\t yygone = REF yygone0;\t#  position in file of beginning of buffer \n\
                     \\t yydone = REF FALSE;\t\t#  eof found yet? \n\
                     \\t yybegin_i = REF 1;\t\t# Current 'start state' for lexer \n\
                     \\n\t yybegin = \\\\ (internal::start_states::STARTSTATE x) =\n\
                     \\t\t yybegin_i := x;\n\n";

                print_lexer ends;

                close_ibuf *lex_buf;

                fil::close_output *lex_out;
            };                                  # fun lex_fn
    };
end;







Comments and suggestions to: bugs@mythryl.org

PreviousUpNext