## Lexical analyzer generator for Standard ML.
## Version 1.7.0, June 1998
# Compiled by:
#
src/app/lex/mythryl-lex.lib# This software comes with ABSOLUTELY NO WARRANTY.
# This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
# COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT",
# distributed with this software). You may copy and distribute this software;
# see the COPYRIGHT NOTICE for details and restrictions.
#
# Changes:
# 07/25/89 (drt): added %header declaration, code to place
# user declarations at same level as make_lexer, etc.
# This is needed for the parser generator.
# /10/89 (appel): added %arg declaration (see lexgen.doc).
# /04/90 (drt): fixed following bug: couldn't use the lexer after an
# error occurred -- NextTok and inquote weren't being reset
# 10/22/91 (drt): disabled use of lookahead
# 10/23/92 (drt): disabled use of $ operator (which involves lookahead),
# added handlers for dictionary lookup routine
# 11/02/92 (drt): changed handler for exception Reject in generated lexer
# to internal::Reject
# 02/01/94 (appel): Moved the exception handler for Reject in such
# a way as to allow tail-recursion (improves performance
# wonderfully!).
# 02/01/94 (appel): Fixed a bug in parsing of state names.
# 05/19/94 (Mikael Pettersson, mpe@ida.liu.se):
# Transition tables are usually represented as strings, but
# when the range is too large, int vectors constructed by
# code like "vector::Vector[1, 2, 3, ...]" are used instead.
# The problem with this isn't that the vector itself takes
# a lot of space, but that the code generated by Lib7 to
# construct the intermediate list at run-time is *HUGE*. My
# fix is to encode an int vector as a string literal (using
# two bytes per int) and emit code to decode the string to
# a vector at run-time. Lib7 compiles string literals into
# substrings in the code, so this uses much less space.
# 06/02/94 (jhr): Modified export-lex.pkg to conform to new installation
# scheme. Also removed tab characters from string literals.
# 10/05/94 (jhr): Changed generator to produce code that uses the new
# basis style strings and characters.
# 10/06/94 (jhr) Modified code to compile under new basis style strings
# and characters.
# 02/08/95 (jhr) Modified to use new List module interface.
# 05/18/95 (jhr) changed vector::Vector to vector::from_list
#
# Revision 1.9 1998/01/06 19:23:53 appel
# added %posarg feature to permit position-within-file to be passed
# as a parameter to make_lexer
#
# Revision 1.8 1998/01/06 19:01:48 appel
# repaired error messages like "cannot have both %package and %header"
#
# Revision 1.7 1998/01/06 18:55:49 appel
# permit %% to be unescaped within regular expressions
#
# Revision 1.6 1998/01/06 18:46:13 appel
# removed undocumented feature that permitted extra %% at end of rules
#
# Revision 1.5 1998/01/06 18:29:23 appel
# put yylineno variable inside make_lexer function
#
# Revision 1.4 1998/01/06 18:19:59 appel
# Check for newline inside quoted string
#
# Revision 1.3 1997/10/04 03:52:13 dbm
# Fix to remove output file if mythryl-lex fails.
#
# 10/17/02 (jhr) changed bad character error message to properly
# print the bad character.
# 10/17/02 (jhr) fixed skipws to use char::is_space test.
# 07/27/05 (jhr) add \r as a recognized escape sequence.
# Subject: lookahead in mythryl-lex
# Reply-to: david.tarditi@CS.CMU.EDU
# Date: Mon, 21 Oct 91 14:13:26 -0400
#
# There is a serious bug in the implementation of lookahead,
# as done in mythryl-lex, and described in Aho, Sethi, and Ullman,
# p. 134 "Implementing the Lookahead Operator"
#
# We have disallowed the use of lookahead for now because
# of this bug.
#
# As a counter-example to the implementation described in
# ASU, consider the following specification with the
# input string "aba" (this example is taken from
# a comp.compilers message from Dec. 1989, I think):
#
# Lex_Result=Void
# linenum = REF 1
# fun error x = file::write (fil::stderr, x + "\n")
# eof = \\ () => ()
# %%
# %package lex
# %%
# (a
|ab)/ba => (print yytext; print "\n"; ());
#
# The ASU proposal works as follows. Suppose that we are
# using NFA's to represent our regular expressions. Then to
# build an NFA for e1 / e2, we build an NFA n1 for e1
# and an NFA n2 for e2, and add an epsilon transition
# from e1 to e2.
#
# When lexing, when we encounter the end state of e1e2,
# we take as the end of the string the position in
# the string that was the last occurrence of the state of
# the NFA having a transition on the epsilon introduced
# for /.
#
# Using the example we have above, we'll have an NFA
# with the following states:
#
#
# 1 -- a --> 2 -- b --> 3
#
| |
#
| epsilon | epsilon
#
| |
#
|------------> 4 -- b --> 5 -- a --> 6
#
# On our example, we get the following list of transitions:
#
# a: 2, 4 (make an epsilon transition from 2 to 4)
# ab: 3, 4, 5 (make an epsilon transition from 3 to 4)
# aba: 6
#
# If we chose the last state in which we made an epsilon transition,
# we'll chose the transition from 3 to 4, and end up with "ab"
# as our token, when we should have "a" as our token.
### "Men have become the tools of their tools."
###
### -- Henry David Thoreau
# Is there any reason to use this instead of standard library red-black trees?
# (Probably dates from era before standard library had them?) XXX SUCKO FIXME
stipulate
package fil = file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkgherein
generic package red_black_g ( b: api { Key;
> : (Key, Key) -> Bool;
}
)
: (weak)
api { Tree;
Key;
empty: Tree;
insert: (Key, Tree) -> Tree;
lookup: (Key, Tree) -> Key;
exception NOT_FOUND Key;
}
{
include package b;
Color = RED
| BLACK;
Tree = EMPTY
| TREE (Key, Color, Tree, Tree); empty = EMPTY;
exception NOT_FOUND Key;
fun insert (key, t)
=
{ fun f EMPTY
=>
TREE (key, RED, EMPTY, EMPTY);
f (TREE (k, BLACK, l, r))
=>
if (key > k)
case (f r)
r as TREE (rk, RED, rl as TREE (rlk, RED, rll, rlr), rr)
=>
case l
TREE (lk, RED, ll, lr)
=>
TREE (k, RED, TREE (lk, BLACK, ll, lr),
TREE (rk, BLACK, rl, rr));
_ => TREE (rlk, BLACK, TREE (k, RED, l, rll),
TREE (rk, RED, rlr, rr));
esac;
r as TREE (rk, RED, rl, rr as TREE (rrk, RED, rrl, rrr))
=>
case l
TREE (lk, RED, ll, lr)
=>
TREE (k, RED, TREE (lk, BLACK, ll, lr),
TREE (rk, BLACK, rl, rr));
_ => TREE (rk, BLACK, TREE (k, RED, l, rl), rr);
esac;
r => TREE (k, BLACK, l, r);
esac;
elif (k > key)
case (f l)
#
l as TREE (lk, RED, ll, lr as TREE (lrk, RED, lrl, lrr))
=>
case r
TREE (rk, RED, rl, rr)
=>
TREE (k, RED, TREE (lk, BLACK, ll, lr),
TREE (rk, BLACK, rl, rr));
_ =>
TREE (lrk, BLACK, TREE (lk, RED, ll, lrl),
TREE (k, RED, lrr, r));
esac;
l as TREE (lk, RED, ll as TREE (llk, RED, lll, llr), lr)
=>
case r
TREE (rk, RED, rl, rr)
=>
TREE (k, RED, TREE (lk, BLACK, ll, lr),
TREE (rk, BLACK, rl, rr));
_ =>
TREE (lk, BLACK, ll, TREE (k, RED, lr, r));
esac;
l => TREE (k, BLACK, l, r);
esac;
else
TREE (key, BLACK, l, r);
fi;
f (TREE (k, RED, l, r))
=>
if (key > k) TREE (k, RED, l, f r);
elif (k > key) TREE (k, RED, f l, r);
else TREE (key, RED, l, r);
fi;
end;
case (f t)
TREE (k, RED, l as TREE(_, RED, _, _), r) => TREE (k, BLACK, l, r);
TREE (k, RED, l, r as TREE(_, RED, _, _)) => TREE (k, BLACK, l, r);
t => t;
esac;
};
fun lookup (key, t)
=
get t
where
fun get EMPTY
=>
raise exception (NOT_FOUND key);
get (TREE (k, _, l, r))
=>
if (k>key) get l;
elif (key>k) get r;
else k;
fi;
end;
end;
};
api Lexgen {
lex_fn: String -> Void;
};
package lex_fn: (weak) Lexgen {
include package rw_vector;
include package list;
infix my 9 sub ;
Token = CHARS Rw_Vector (Bool)
| QMARK | STAR | PLUS | BAR
| LP | RP | CARAT | DOLLAR | SLASH | STATE List( String )
| REPS (Int, Int) | ID String | ACTION String
| BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES
| COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
;
Expression
= EPS
| ILK (Rw_Vector( Bool ), Int) | CLOSURE Expression
| ALT (Expression, Expression) | CAT (Expression, Expression) | TRAIL Int
| END Int
;
# Flags describing input Lex spec.
# - unnecessary code is omitted
# if possible
char_format = REF FALSE;
uses_trailing_context = REF FALSE;
uses_previous_newline = REF FALSE;
# Flags for various bells & whistles that Lex has.
# These slow the lexer down and should be omitted
# from production lexers (if you really want speed)
count_newlines = REF FALSE;
pos_arg = REF FALSE;
have_reject = REF FALSE;
# Can increase size of character set
char_set_size = REF 129;
# Can name package or declare header code
package_name = REF "Mlex";
header_code = REF "";
header_decl = REF FALSE;
arg_code = REF (NULL: Null_Or( String ));
package_declaration
=
REF FALSE;
reset_flags
=
\\ ()
=
{ count_newlines := FALSE;
have_reject := FALSE;
pos_arg := FALSE;
uses_trailing_context := FALSE;
char_set_size := 129;
package_name := "Mlex";
header_code := "";
header_decl := FALSE;
arg_code := NULL;
package_declaration := FALSE;
};
lex_out = REF fil::stdout;
fun say x
=
fil::write(*lex_out, x);
# Union: merge two sorted lists of integers
#
fun union (a, b)
=
merge ( reverse a,
reverse b,
NIL
)
where
recursive my merge
=
\\ (NIL, NIL, z) => z;
(NIL, el ! more, z) => merge (NIL, more, el ! z);
(el ! more, NIL, z) => merge (more, NIL, el ! z);
(x ! morex, y ! morey, z)
=>
if ((x: Int)==(y: Int)) merge (morex, morey, x ! z);
elif (x > y) merge (morex, y ! morey, x ! z);
else merge (x ! morex, morey, y ! z);
fi;
end;
end;
# Nullable: compute if a important expression
# parse tree node is nullable
#
recursive my nullable
=
\\
EPS => TRUE;
ILK(_) => FALSE;
CLOSURE(_) => TRUE;
ALT (n1, n2) => nullable n1 or nullable n2;
CAT (n1, n2) => nullable n1 and nullable n2;
TRAIL (_) => TRUE;
END (_) => FALSE;
end
# FIRSTPOS: firstpos function for parse tree expressions
#
also
firstpos
=
\\
EPS => NIL;
ILK(_, i) => [i];
CLOSURE (n) => firstpos n;
ALT (n1, n2) => union (firstpos n1, firstpos n2);
CAT (n1, n2) => if (nullable n1 ) union (firstpos n1, firstpos n2);
else firstpos n1; fi;
TRAIL i => [i];
END i => [i];
end
# LASTPOS: Lastpos function for parse tree expressions
#
also
lastpos
=
\\ EPS => NIL;
ILK(_, i) => [i];
CLOSURE n => lastpos n;
ALT (n1, n2) => union (lastpos n1, lastpos n2);
CAT (n1, n2) => if (nullable n2 ) union (lastpos n1, lastpos n2);
else lastpos n2; fi;
TRAIL i => [i];
END i => [i];
end
;
# +++: Increment an integer reference
fun +++(x) : Int
=
{ x := *x + 1;
*x;
};
package dictionary {
Relation(X)
=
(X, X) -> Bool;
# abstype Dictionary (Y, X)
# =
# DATA { table: List( (Y, X) ),
# leq: (Y, Y) -> Bool
# }
# with
stipulate
Dictionary (Y, X) # Start of abstype-replacement recipe -- see http://successor-ml.org/index.php?title=Degrade_abstype_to_derived_form
= #
DATA { table: List( (Y, X) ), #
leq: (Y, Y) -> Bool #
}; #
herein #
Dictionary (Y, X) = Dictionary (Y, X); # End of abstype-replacement recipe
exception LOOKUP;
fun create leqfunc
=
DATA { table => NIL, leq => leqfunc };
fun lookup (DATA { table => entrylist, leq } ) key
=
search entrylist
where
fun search []
=>
raise exception LOOKUP;
search((k, item) ! entries)
=>
if (leq (key, k))
if (leq (k, key)) item;
else raise exception LOOKUP;
fi;
else
search entries;
fi;
end;
end;
fun enter (DATA { table => entrylist, leq } )
(newentry as (key: Y, item: X)) : Dictionary (Y, X)
=
{ gt = \\ a = \\ b = not (leq (a, b));
eq = \\ k = \\ k' = (leq (k, k')) and (leq (k', k));
fun update NIL
=>
[ newentry ];
update ((entry as (k, _)) ! entries)
=>
if (eq key k ) newentry ! entries;
elif (gt k key ) newentry ! (entry ! entries);
else entry ! (update entries);
fi;
end;
DATA { table => update entrylist, leq };
};
fun listofdict (DATA { table => entrylist, leq } )
=
f (entrylist, NIL)
where
fun f (NIL, r) => reverse r;
f (a ! b, r) => f (b, a ! r);
end;
end;
end;
};
include package dictionary;
# INPUT.ML: Input w/ one character push back capability
line_num = REF 1;
stipulate
Ibuf = BUF ( # Start of abstype-replacement recipe -- see http://successor-ml.org/index.php?title=Degrade_abstype_to_derived_form
fil::Input_Stream, #
#
{ b: Ref( String ), #
p: Ref( Int ) #
} #
); #
herein #
Ibuf = Ibuf; # End of abstype-replacement recipe.
fun make_ibuf s
=
BUF (s, { b=>REF "", p => REF 0 } );
fun close_ibuf (BUF (s, _))
=
fil::close_input s;
exception EOF_EXCEPTION;
fun getch (a as (BUF (s,{ b, p } )))
=
if (*p == size *b)
#
b := fil::read_n (s, 1024);
p := 0;
if (size *b == 0) raise exception EOF_EXCEPTION;
else getch a;
fi;
else
ch = string::get_byte_as_char(*b, *p);
#
if (ch == '\n')
line_num := *line_num + 1;
fi;
p := *p + 1;
ch;
fi;
fun ungetch (BUF (s,{ b, p } ))
=
{ p := *p - 1;
#
if (string::get_byte_as_char(*b,*p) == '\n')
line_num := *line_num - 1;
fi;
};
end;
exception ERROR;
fun pr_err x
=
{ fil::write (
fil::stderr,
string::cat [
"mythryl-lex: error, line ",
(int::to_string *line_num),
": ",
x,
"\n"
]
);
raise exception ERROR;
};
fun pr_syn_err x
=
{ fil::write (
fil::stderr,
string::cat [
"mythryl-lex: syntax error, line ", # <-- Only line differing from above fn.
(int::to_string *line_num),
": ",
x,
"\n"
]
);
raise exception ERROR;
};
exception SYNTAX_ERROR; # Error in user's input file.
exception LEX_ERROR; # Unexpected error in lexer.
lex_buf = REF (make_ibuf fil::stdin);
lex_state = REF 0;
next_tok = REF BOF;
inquote = REF FALSE;
fun advance_tok () : Void
=
{ fun is_letter c
=
(c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z');
fun is_digit c
=
(c >= '0') and (c <= '9');
fun is_odigit c
=
(c >= '0') and (c <= '7');
fun is_xdigit c
=
((c >= '0') and (c <= '9'))
or
((c >= 'a') and (c <= 'f'))
or
((c >= 'A') and (c <= 'F'));
# Check for valid (non-leading) identifier character (added by John H Reppy)
fun is_ident_chr c
=
( is_letter c
or is_digit c
or c == '_'
or c == '\''
);
fun atoi s
=
num (explode s, 0)
where
fun num (c ! r, n)
=>
if (is_digit c) num (r, 10*n + (char::to_int c - char::to_int '0'));
else n;
fi;
num ([], n)
=>
n;
end;
end;
fun skipws ()
=
{ ch = nextch();
if (char::is_space ch) skipws();
else ch;
fi;
}
also
fun nextch ()
=
getch *lex_buf
also
fun escaped ()
=
case (nextch ())
#
'b' => '\x08';
'n' => '\n';
'r' => '\r';
't' => '\t';
'h' => '\x80';
'x' => { fun err t
=
pr_err("illegal ascii hex escape '\\x" + (implode (reverse t)) + "'");
fun convert c
=
case c
'0' => 0;
'1' => 1;
'2' => 2;
'3' => 3;
'4' => 4;
'5' => 5;
'6' => 6;
'7' => 7;
'8' => 8;
'9' => 9;
'a' => 10; 'A' => 10;
'b' => 11; 'B' => 11;
'c' => 12; 'C' => 12;
'd' => 13; 'D' => 13;
'e' => 14; 'E' => 14;
'f' => 15; 'F' => 15;
_ => raise exception DIE "Impossible";
esac;
fun f (i, count, chars)
=
if (count == 2)
if (i >= *char_set_size) err chars;
else char::from_int i;
fi;
else
ch = nextch ();
if (is_xdigit ch) f (i*16+(convert ch), count+1, ch ! chars);
else err chars;
fi;
fi;
x = nextch ();
if (is_xdigit x) f (convert x, 1, [x]);
else x;
fi;
};
x => { fun err t
=
pr_err("illegal ascii octal escape '\\" + (implode (reverse t)) + "'");
fun convert c
=
char::to_int c - char::to_int '0';
fun f (i, count, chars)
=
if (count == 3)
if (i >= *char_set_size) err chars;
else char::from_int i;
fi;
else
ch = nextch ();
if (is_odigit ch) f (i*8+(convert ch), count+1, ch ! chars);
else err chars;
fi;
fi;
if (is_odigit x) f (convert x, 1, [x]);
else x;
fi;
};
esac
also
fun onechar x
=
{ c = make_rw_vector (*char_set_size, FALSE);
#
set (c, char::to_int x, TRUE);
CHARS c;
};
case *lex_state
#
0 =>
next_tok := make_tok ()
where
make_tok
=
\\ ()
=
case (skipws ())
#
# Lex % operators
#
'%' => case (nextch ())
'%' => LEXMARK;
a =>
{ fun f s
=
{ a = nextch();
if (is_letter a)
f (a ! s);
else
ungetch *lex_buf;
implode (reverse s);
fi;
};
case (f [a])
"reject" => REJECT;
"count" => COUNT;
"full" => FULLCHARSET;
"s" => LEXSTATES;
"S" => LEXSTATES;
"package" => STRUCT;
"header" => HEADER;
"arg" => ARG;
"posarg" => POSARG;
_ => pr_err "unknown % operator ";
esac;
};
esac;
# Semicolon (for end of LEXSTATES):
#
';' => SEMI;
# Anything else:
#
ch => if (is_letter ch)
fun get_id matched
=
{ x = nextch();
/**** fix by John H Reppy
if is_letter x or is_digit x or
x == "_" or x == "'"
****/
if (is_ident_chr x)
get_id (x ! matched);
else
ungetch *lex_buf;
implode (reverse matched);
fi;
};
ID (get_id [ch]);
else
pr_syn_err (string::cat [
"bad character: \"", char::to_string ch, "\""
]);
fi;
esac;
end;
1 =>
{ recursive my make_tok
=
\\ ()
=
if *inquote
#
case (nextch ())
#
# Inside quoted string
#
'\\' => onechar (escaped());
'"' => { inquote := FALSE;
make_tok();
};
'\n' => { pr_syn_err "end-of-line inside quoted string";
inquote := FALSE;
make_tok();
};
x => onechar x;
esac;
else
case (skipws ())
# Single character operators:
#
'?' => QMARK;
'*' => STAR;
'+' => PLUS;
'
|' => BAR;
'(' => LP;
')' => RP;
'^' => CARAT;
'$' => DOLLAR;
'/' => SLASH;
';' => SEMI;
'.' => { c = make_rw_vector (*char_set_size, TRUE);
set (c, 10, FALSE);
CHARS c;
};
# Assign and arrow
'=' => { c = nextch();
if (c == '>')
ARROW;
else
ungetch *lex_buf;
ASSIGN;
fi;
};
# Character set:
'[' => { recursive my ilkch
=
\\ () = { x = skipws();
#
if (x == '\\') escaped ();
else x;
fi;
};
first = ilkch();
flag = (first != '^');
c = make_rw_vector(*char_set_size, not flag);
fun add NULL => ();
add (THE x) => set (c, char::to_int x, flag);
end
also
fun range (x, y)
=
if (x > y)
pr_err "bad char. range";
else
i = REF (char::to_int x);
j = char::to_int y;
for (*i <= j) {
add (THE (char::from_int *i));
i := *i + 1;
};
fi
also
fun get_ilk last
=
case (ilkch ())
']' => { add last;
c;
};
'-' => case last
NULL
=>
get_ilk (THE '-');
THE last'
=>
{ x = ilkch ();
if (x == ']')
add last;
add (THE '-'); c;
else
range (last', x);
get_ilk NULL;
fi;
};
esac;
x => { add last;
get_ilk (THE x);
};
esac;
CHARS (get_ilk (first == '^' ?? NULL :: THE first));
};
# Start States specification:
#
'<' => { recursive my get_state
=
\\ (prev, matched)
=
case (nextch ())
#
'>' => matched ! prev;
',' => get_state (matched ! prev, "");
x => if (is_ident_chr x) get_state (prev, matched + string::from_char x);
else pr_syn_err "bad start state list";
fi;
esac;
STATE (get_state (NIL, ""));
};
# { id } or repetitions
'{' => { ch = nextch();
#
if (is_letter ch)
#
fun get_id matched
=
case (nextch ())
#
'}' => matched;
x => if (is_ident_chr x)
#
get_id (matched + string::from_char x);
else
pr_err "invalid char. class name";
fi;
esac;
ID (get_id (string::from_char ch));
elif (is_digit ch)
fun get_r (matched, r1)
=
case (nextch ())
#
'}' => { n = atoi matched;
#
if (r1 == -1) (n, n);
else (r1, n);
fi;
};
',' => if (r1 == -1) get_r("", atoi matched);
else pr_err "invalid repetitions spec.";
fi;
x => if (is_digit x) get_r (matched + string::from_char x, r1);
else pr_err "invalid char in repetitions spec";
fi;
esac;
REPS (get_r (string::from_char ch, -1));
else
pr_err "bad repetitions spec";
fi;
};
# Lex % operators:
'\\' => onechar (escaped());
# Start quoted string:
#
'"' => { inquote := TRUE;
make_tok ();
};
# Anything else:
#
ch => onechar ch;
esac;
fi;
next_tok := make_tok();
};
2 => next_tok
:=
case (skipws ())
'(' =>
{ fun loop_to_end (backslash, x)
=
{ c = getch *lex_buf;
notb = not backslash;
nstr = c ! x;
case c
'"' => if notb nstr;
else loop_to_end (FALSE, nstr);
fi;
_ => loop_to_end (c == '\\' and notb, nstr);
esac;
};
fun get_act (lpct, x)
=
{ c = getch *lex_buf;
nstr = c ! x;
case c
'"' => get_act (lpct, loop_to_end (FALSE, nstr));
'(' => get_act (lpct + 1, nstr);
')' => if (lpct == 0 ) implode (reverse x);
else get_act (lpct - 1, nstr);
fi;
_ => get_act (lpct, nstr);
esac;
};
ACTION (get_act (0, NIL));
};
';' => SEMI;
c => (pr_syn_err ("invalid character " + string::from_char c));
esac;
_ => raise exception LEX_ERROR;
esac;
}
except
EOF_EXCEPTION
=
next_tok := EOF;
fun get_tok (_: Void) : Token
=
{ t = *next_tok;
advance_tok();
t;
};
sym_tab
=
REF (create string::(<=)) : Ref( Dictionary( String, Expression ) );
fun get_expression () : Expression
=
expression0 ()
where
recursive my optional
=
\\ e = ALT (EPS, e)
also
lookup'
=
\\ name
=
lookup *sym_tab name
except
LOOKUP
=
pr_err ("bad regular expression name: " + name)
also
newline
=
\\ ()
=
{ c = make_rw_vector (*char_set_size, FALSE);
set (c, 10, TRUE);
c;
}
also
endline
=
\\ e = trail (e, ILK (newline(), 0))
also
trail
=
\\ (e1, e2)
=
CAT (CAT (e1, TRAIL 0), e2)
also
closure1
=
\\ e
=
CAT (e, CLOSURE e)
also
repeat
=
\\ (min, max, e)
=
rep (min, max)
where
recursive my rep
=
\\ (0, 0) => EPS;
(0, 1) => ALT (e, EPS);
(0, i) => CAT (rep (0, 1), rep (0, i - 1));
(i, j) => CAT (e, rep (i - 1, j - 1));
end;
end
also
expression0
=
\\ ()
=
case (get_tok ())
#
CHARS c => expression1 (ILK (c, 0));
LP => { e = expression0 ();
#
if (*next_tok == RP)
#
advance_tok ();
expression1 e;
else
pr_syn_err "missing '('";
fi;
};
ID name => expression1 (lookup' name);
_ => raise exception SYNTAX_ERROR;
esac
also
expression1
=
\\ e
=
case *next_tok
#
SEMI => e;
ARROW => e;
EOF => e;
LP => expression2 (e, expression0());
RP => e;
t => { advance_tok();
#
case t
QMARK => expression1 (optional e);
STAR => expression1 (CLOSURE e);
PLUS => expression1 (closure1 e);
CHARS c => expression2 (e, ILK (c, 0));
BAR => ALT (e, expression0());
DOLLAR => { uses_trailing_context := TRUE;
endline e;
};
SLASH => { uses_trailing_context := TRUE;
trail (e, expression0());
};
REPS (i, j)
=>
expression1 (repeat (i, j, e));
ID name
=>
expression2 (e, lookup' name);
_ => raise exception SYNTAX_ERROR;
esac;
};
esac
also
expression2
=
\\ (e1, e2)
=
case *next_tok
#
SEMI => CAT (e1, e2);
ARROW => CAT (e1, e2);
EOF => CAT (e1, e2);
LP => expression2 (CAT (e1, e2), expression0());
RP => CAT (e1, e2);
t => { advance_tok();
#
case t
QMARK => expression1 (CAT (e1, optional e2));
STAR => expression1 (CAT (e1, CLOSURE e2));
PLUS => expression1 (CAT (e1, closure1 e2));
CHARS c => expression2 (CAT (e1, e2), ILK (c, 0));
BAR => ALT (CAT (e1, e2), expression0());
DOLLAR => { uses_trailing_context := TRUE;
endline (CAT (e1, e2));
};
SLASH => { uses_trailing_context := TRUE;
trail (CAT (e1, e2), expression0());
};
REPS (i, j)
=>
expression1 (CAT (e1, repeat (i, j, e2)));
ID name
=>
expression2 (CAT (e1, e2), lookup' name);
_ => raise exception SYNTAX_ERROR;
esac;
};
esac;
end; # fun get_expression
state_tab
=
REF (create (string::(<=))) : Ref( Dictionary( String, Int ) );
state_num = REF 0;
fun get_states () : List( Int )
=
{ fun add NIL sl
=>
sl;
add (x ! y) sl
=>
add y (union ( [ lookup *state_tab x
except
LOOKUP = pr_err ("bad state name: " + x)
],
sl));
end;
fun addall i sl
=
if (i <= *state_num) addall (i+2) (union ([i], sl));
else sl;
fi;
fun incall (x ! y) => (x+1) ! incall y;
incall NIL => NIL;
end;
fun addincs (x ! y) => x ! (x+1) ! addincs y;
addincs NIL => NIL;
end;
state_list
=
case *next_tok
STATE s => { advance_tok();
lex_state := 1;
add s NIL;
};
_ => addall 1 NIL;
esac;
case *next_tok
#
CARAT
=>
{ lex_state := 1;
advance_tok ();
uses_previous_newline := TRUE;
incall state_list;
};
_ =>
addincs state_list;
esac;
}; # fun get_states
leaf_num = REF -1;
fun renum (e: Expression) : Expression
=
label e
where
recursive my label
=
\\ EPS => EPS;
ILK (x, _) => ILK (x,+++leaf_num);
CLOSURE e => CLOSURE (label e);
ALT (e1, e2) => ALT (label e1, label e2);
CAT (e1, e2) => CAT (label e1, label e2);
TRAIL i => TRAIL(+++leaf_num);
END i => END(+++leaf_num);
end;
end;
exception PARSE_ERROR;
fun parse () : ((String, List( (List( Int ), Expression)), Dictionary (String, String)))
=
{ accept
=
REF (create string::(<=)) : Ref( Dictionary( String, String ) );
recursive my parse_rtns
=
\\ l = case (getch *lex_buf)
#
'%' => { c = getch *lex_buf;
if (c == '%') implode (reverse l);
else parse_rtns (c ! '%' ! l);
fi;
};
c => parse_rtns (c ! l);
esac
also
parse_defs
=
\\ ()
=
{ lex_state := 0;
advance_tok ();
case *next_tok
#
LEXMARK
=>
();
LEXSTATES
=>
{ fun f ()
=
case *next_tok
ID i
=>
{ state_tab := enter *state_tab (i, +++state_num);
+++state_num;
advance_tok ();
f ();
};
_ => ();
esac;
advance_tok();
f ();
if (*next_tok == SEMI) parse_defs ();
else pr_syn_err "expected ';'";
fi;
};
ID x
=>
{ lex_state := 1;
#
advance_tok ();
if (get_tok() == ASSIGN)
#
sym_tab := enter *sym_tab (x, get_expression());
if (*next_tok == SEMI) parse_defs();
else pr_syn_err "expected ';'";
fi;
else
raise exception SYNTAX_ERROR;
fi;
};
REJECT => { have_reject := TRUE; parse_defs(); };
COUNT => { count_newlines := TRUE; parse_defs(); };
FULLCHARSET => { char_set_size := 256; parse_defs(); };
HEADER => { lex_state := 2; advance_tok();
#
case (get_tok ())
#
ACTION s
=>
if *package_declaration
(pr_err "cannot have both %package and %header \
\declarations");
elif *header_decl
pr_err "duplicate %header declarations";
else
header_code := s;
lex_state := 0;
header_decl := TRUE;
parse_defs();
fi;
_ => raise exception SYNTAX_ERROR;
esac;
};
POSARG => { pos_arg := TRUE;
parse_defs ();
};
ARG => { lex_state := 2;
advance_tok();
case (get_tok ())
#
ACTION s
=>
{ case *arg_code
THE _ => pr_err "duplicate %arg declarations";
NULL => arg_code := THE s;
esac;
lex_state := 0;
parse_defs ();
};
_ => raise exception SYNTAX_ERROR;
esac;
};
STRUCT => { advance_tok();
#
case *next_tok
#
ID i => if *header_decl
#
pr_err "cannot have both %package and %header \
\declarations";
elif *package_declaration
pr_err "duplicate %package declarations";
else
package_name := i;
package_declaration := TRUE;
fi;
_ => (pr_err "expected ID");
esac;
parse_defs ();
};
_ => raise exception SYNTAX_ERROR;
esac;
} # fun parse_defs
also
parse_rules
=
\\ rules
=
{ lex_state := 1;
#
advance_tok ();
case *next_tok
#
EOF => rules;
_ =>
{ s = get_states();
#
e = renum (CAT (get_expression(), END 0));
if (*next_tok == ARROW)
#
lex_state := 2;
advance_tok ();
case (get_tok ())
#
ACTION act
=>
if (*next_tok == SEMI)
#
accept := enter *accept (int::to_string *leaf_num, act);
parse_rules((s, e) ! rules);
else
pr_syn_err "expected ';'";
fi;
_ =>
raise exception SYNTAX_ERROR;
esac;
else
pr_syn_err "expected '=>'";
fi;
};
esac;
};
usercode = parse_rtns NIL;
parse_defs ();
( usercode,
parse_rules NIL,
*accept
);
}
except
SYNTAX_ERROR
=
pr_syn_err "";
fun makebegin () : Void
=
{ fun make ((x, n: Int) ! y)
=>
{ say "my ";
say x;
say " = " ;
say "STARTSTATE ";
say (int::to_string n);
say ";\n";
make y;
};
make NIL
=>
();
end;
say "\n# start state definitions \n\n";
make (listofdict *state_tab);
};
package l
=
package {
nonfix my > ;
Key = (List (Int), String);
fun > ((key, item: String), (key', item'))
=
f key key'
where
fun f ((a: Int) ! a') (b ! b')
=>
if (int::(>) (a, b)) TRUE;
elif (a == b) f a' b';
else FALSE;
fi;
f _ _
=>
FALSE;
end;
end;
};
package rb
=
red_black_g( l );
fun maketable (fins: List( (Int, (List( Int )))),
tcs: List ((Int, (List( Int )))),
tcpairs: List ((Int, Int)),
trans: List ((Int,(List( Int ))))) : Void
=
{ # Fins = List (state #, list of final leaves for the state)
# tcs = List (state #, list of trailing context leaves which begin in this state)
#
# tcpairs = List (trailing context leaf, end leaf)
# trans = List (state #, list of transitions for state)
Element = NN Int
| TT Int | DD Int;
count = REF 0;
char_format := length trans < 256;
if *uses_trailing_context say "\nYyfinstate = NN Int
| TT Int | DD Int;\n";
else say "\nYyfinstate = NN Int;";
fi;
say "\nStatedata = { fin: List( Yyfinstate ), trans: ";
case *char_format
TRUE => say "String };";
FALSE => say "vector::Vector( Int ) };";
esac;
say "\n\
\# transition & final state table \n\
\tab = {\n";
case *char_format
#
TRUE => ();
FALSE =>
{ say "fun decode s k =\n";
say " { k' = k + k;\n";
say " hi = string::get_byte (s, k');\n";
say " lo = string::get_byte (s, k' + 1);\n";
say "\n";
say " hi * 256 + lo;\n";
say " };\n";
};
esac;
newfins
=
{ fun is_end_leaf t
=
f tcpairs
where
fun f ((l, e) ! r) => if (e==t) TRUE;
else f r;
fi;
f NIL => FALSE;
end;
end;
fun get_end_leaf t
=
f tcpairs
where
fun f ((tl, el) ! r)
=>
tl == t ?? el
:: f r;
f _ => raise exception MATCH;
end;
end;
fun get_tr_con_leaves s
=
f tcs
where
fun f ((s', l) ! r)
=>
s == s' ?? l
:: f r;
f NIL => NIL;
end;
end;
fun sort_leaves s
=
{ fun insert (x: Int) (a ! b)
=>
if (x <= a) x ! (a ! b);
else a ! (insert x b);
fi;
insert x NIL
=>
[x];
end;
list::fold_backward
(\\ (x, r) = insert x r)
[] s;
};
fun conv a
=
is_end_leaf a ?? DD a
:: NN a;
fun merge (a ! a', b ! b')
=>
if (a <= b) (conv a) ! merge (a', b ! b');
else (TT b) ! merge (a ! a', b');
fi;
merge (a ! a', NIL) => (conv a) ! (merge (a', NIL));
merge (NIL, b ! b') => (TT b) ! (merge (b', NIL));
merge (NIL, NIL) => NIL;
end;
map
(\\ (x, l)
=
reverse (
merge (
l,
sort_leaves (
map
(\\ x = get_end_leaf x)
(get_tr_con_leaves x)
)
)
)
)
fins;
};
rs = result
where
include package rb;
#
fun make_items x
=
{ fun emit8 (x, pos)
=
{ s = sprintf "x%02x" x; # Was: number_string::pad_left '0' 3 (int::to_string x);
#
case pos
16 => { say "\\\n\\\\"; say s; 1; };
_ => { say "\\"; say s; pos+1; };
esac;
};
fun emit16 (x, pos)
=
{ hi8 = x / 256;
lo8 = x - hi8 * 256; # x rem 256
emit8 (lo8, emit8 (hi8, pos));
};
fun make_string ([], _, _)
=>
();
make_string (x ! xs, emitter, pos)
=>
make_string (xs, emitter, emitter (x, pos));
end;
case *char_format
#
TRUE => { say " \n\"";
make_string (x, emit8, 0);
say "\"\n";
};
FALSE => { say (int::to_string (length x));
say ", \n\"";
make_string (x, emit16, 0);
say "\"\n";
};
esac;
};
fun make_entry (NIL, rs, t)
=>
reverse rs;
make_entry(((l: Int, x) ! y), rs, t)
=>
{ name = (int::to_string l);
#
{ my (r, n)
=
lookup ((x, name), t);
make_entry (y, (n ! rs), t);
}
except
NOT_FOUND _
=
{ count := *count+1;
say " (";
say name;
say ", ";
make_items x;
say "),\n";
make_entry (y, (name ! rs), (insert ((x, name), t)));
};
};
end;
say " s = [ \n";
result = make_entry (trans, NIL, empty);
case *char_format
#
TRUE
=>
{ say " (0, \"\")];\n";
say " fun f x = x;\n";
};
FALSE
=>
{ say " (0, 0, \"\")];\n";
say " fun f (n, i, x) = (n, vector::from_fn (i, decode x));\n";
};
esac;
say " s = map f (reverse (tail (reverse s)));\n";
say " exception LEX_HACKING_ERROR;\n";
say " fun get ((j, x) ! r, i: Int)\n";
say " =>\n";
say " if (i == j) x; else get (r, i); fi;\n\n";
say " get ([], i)\n";
say " =>\n";
say " raise exception LEX_HACKING_ERROR;\n";
say " end;\n";
say "fun g { fin => x, trans => i }\n";
say " =\n";
say " { fin => x, trans => get (s, i) };\n";
end;
fun make_table args
=
maketable args
where
fun make_one (a, b)
=
{ fun item (NN i) => ("NN", i);
item (TT i) => ("TT", i);
item (DD i) => ("DD", i);
end;
fun make_item x
=
{ my (t, n)
=
item x;
apply say ["(", t, " ", int::to_string n, ")"];
};
fun make_items [] => ();
make_items [x] => make_item x;
make_items (hd ! tl)
=>
{ make_item hd;
say ", ";
make_items tl;
};
end;
say "{ fin => [";
make_items b;
apply say ["], trans => ", a, "}"];
};
fun maketable ([], []) => ();
maketable ([a], [b]) => make_one (a, b);
maketable (a ! a', b ! b')
=>
{ make_one (a, b);
say ",\n";
maketable (a', b');
};
maketable _ => raise exception MATCH;
end;
end;
# fun make_table (NIL, NIL) => ();
# make_table (a ! a', b ! b') =>
# { funx make_items NIL = ()
#
| make_items (hd ! tl) =
# { my (t, n) =
# case hd of
# (NN i) => ("(NN ", i)
#
| (TT i) => ("(TT ", i)
#
| (DD i) => ("(DD ", i);
# say t; say (int::to_string n); say ")";
# if (null tl)
# ();
# else (say ", "; make_items tl); fi;
# };
# say "{ fin = ["; make_items b;
# say "], trans = "; say a; say "}";
# if (null a')
# ();
# else (say ",\n"; make_table (a', b')); fi;
# };
# end;
fun msg x
=
fil::say {. x; };
say " vector::from_list (map g \n[";
make_table (rs, newfins);
say "]);\n};\n";
msg ( " lexgen.pkg: Number of states = " + (int::to_string (length trans)));
msg ( " lexgen.pkg: Number of distinct rows = " + (int::to_string *count));
msg ( " lexgen.pkg: Approximate memory size of translation table = "
+ (int::to_string (*count * *char_set_size * (*char_format ?? 1 :: 8)))
+ " bytes\n\n"
);
};
# makeaccept: Takes a (String, String) dictionary, prints case statement for
# accepting leaf actions. The key strings are the leaf #'s, the data strings
# are the actions
fun makeaccept ends
=
make (listofdict ends, TRUE)
where
fun startline f
=
say " ";
fun make (NIL, f)
=>
{ startline f;
say "_ => raise exception internal::LEXER_ERROR;\n";
};
make ((x, a) ! y, f)
=>
{ startline f;
say x;
say " => ";
if (substring::size(#2 (substring::position "yytext" (substring::from_string a))) == 0)
say "{ ";
say a;
say "; };";
else
say "{ yytext=yymktext();\n";
say a;
say "; };";
fi;
say "\n";
make (y, FALSE);
};
end;
end;
fun leafdata (e: List( (List( Int ), Expression)))
=
{ fp = make_rw_vector (*leaf_num + 1, NIL);
leaf = make_rw_vector (*leaf_num + 1, EPS);
tcpairs = REF NIL;
trailmark = REF -1;
recursive my add
=
\\ (NIL, x) => ();
(hd ! tl, x) => { set (fp, hd, union (fp[ hd ], x));
add (tl, x);
};
end
also
moredata
=
\\ CLOSURE e1 => { moredata e1;
add (lastpos e1, firstpos e1);
};
ALT (e1, e2) => { moredata e1;
moredata e2;
};
CAT (e1, e2) => { moredata e1;
moredata e2;
add (lastpos e1, firstpos e2);
};
ILK (x, i) => set (leaf, i, ILK (x, i));
TRAIL i => { set (leaf, i, TRAIL i);
if (*trailmark == -1)
trailmark := i;
fi;
};
END i => { set (leaf, i, END i);
#
if (*trailmark != -1)
trailmark := -1;
tcpairs := (*trailmark, i) ! *tcpairs;
fi;
};
_ => ();
end
also
makedata
=
\\
NIL => ();
(_, x) ! tl
=>
{ moredata x;
makedata tl;
};
end;
trailmark := -1;
makedata e;
(fp, leaf, *tcpairs);
};
fun makedfa rules
=
{ visitstarts( startstates() );
( listofdict *fintab,
listofdict *transtab,
listofdict *tctab,
tcpairs
);
}
where
state_tab = REF (create (string::(<=))): Ref( Dictionary (String, Int ));
fintab = REF (create (int::(<=))): Ref( Dictionary (Int, (List( Int))));
transtab = REF (create (int::(<=))): Ref( Dictionary (Int, List( Int)) );
tctab = REF (create (int::(<=))): Ref( Dictionary (Int, (List( Int))));
my (fp, leaf, tcpairs)
=
leafdata rules;
fun visit (state, statenum)
=
{ transitions = gettrans state;
fintab := enter *fintab (statenum, getfin state);
tctab := enter *tctab (statenum, gettc state);
transtab := enter *transtab (statenum, transitions);
}
also
fun visitstarts states
=
vs states 0
where
fun vs NIL i => ();
vs (hd ! tl) i => { visit (hd, i); vs tl (i+1); };
end;
end
also
fun hashstate (s: List( Int ))
=
hs (s, "")
where
recursive my hs
=
\\ ((x: Int) ! y, z)
=>
hs (y, z + " " + (int::to_string x));
(NIL, z)
=>
z;
end;
end
also
fun find s
=
lookup *state_tab (hashstate s)
also
fun add (s, n)
=
state_tab := enter *state_tab (hashstate s, n)
also
fun getstate state
=
find state
except
LOOKUP
=
{ n = +++state_num;
add (state, n);
visit (state, n);
n;
}
also
fun getfin state
=
f state NIL
where
fun f (hd ! tl) fins
=>
case (leaf[ hd ])
END _ => f tl (hd ! fins);
_ => f tl fins;
esac;
f NIL fins
=>
fins;
end;
end
also
fun gettc state
=
f state NIL
where
fun f (hd ! tl) fins
=>
case (leaf[ hd ])
TRAIL _ => f tl (hd ! fins);
_ => f tl fins;
esac;
f NIL fins
=>
fins;
end;
end
also
fun gettrans state
=
loop (*char_set_size - 1) NIL
where
fun loop c tlist
=
{ fun cktrans NIL r
=>
r;
cktrans (hd ! tl) r
=>
case (leaf[ hd ])
ILK (i, _)
=>
if (i[ c ])
cktrans tl (union (r, fp[ hd ]));
else
cktrans tl r
except
INDEX_OUT_OF_BOUNDS
=
cktrans tl r;
fi;
_ => cktrans tl r;
esac;
end;
if (c >= 0)
v=cktrans state NIL;
loop (c - 1) if (v==NIL ) 0 ! tlist; else (getstate v) ! tlist; fi;
else
tlist;
fi;
};
end
also
fun startstates ()
=
{ makess rules;
listofarray (startarray, *state_num + 1);
}
where
startarray
=
make_rw_vector (*state_num + 1, NIL);
fun listofarray (a, n)
=
f (n - 1) NIL
where
fun f i l
=
i >= 0
?? f (i - 1) (a[i] ! l)
:: l;
end;
recursive my makess
=
\\
NIL => ();
(startlist, e) ! tl
=>
{ fix (startlist, firstpos e);
makess tl;
};
end
also
fix = \\
(NIL, _) => ();
(s ! tl, firsts)
=>
{ set (startarray,
s,
union (firsts, startarray[ s ])
);
fix (tl, firsts);
};
end ;
end;
end; # fun makedfa
skel_hd
=
" \n\
\ package user_declarations {\n\
\ \n\
\";
skel_mid2
=
"
| internal::DD k => action (i, (acts ! l), k ! rs)\n\
\
| internal::TT k =>\n\
\ { fun f (a ! b, r)\n\
\ =>\n\
\ if (a == k)\n\
\ action (i, (((internal::NN a) ! acts) ! l), (b@r));\n\
\ else\n\
\ f (b, a ! r);\n\
\ fi;\n\
\ \n\
\ f (NIL, r)\n\
\ =>\n\
\ action (i, (acts ! l), rs);\n\
\ end;\n\
\ \n\
\ f (rs, NIL);\n\
\ }\n\
\";
fun lex_fn infile
=
{ outfile = infile + ".pkg";
fun print_lexer ends
=
{ sayln
=
\\ x = { say x; say "\n"; };
case *arg_code
#
NULL => { sayln "fun lex () : internal::Result =";
sayln "{ fun continue () = lex(); ";
};
THE s => { say "fun lex ";
say "(yyarg as (";
say s;
sayln ")) =";
sayln " { fun continue () : internal::Result = ";
};
esac;
say " { fun scan (s, accepting_leaves: List( List( internal::Yyfinstate";
sayln " ) ), l, i0) =";
if *uses_trailing_context say "\t { fun action (i, NIL, rs)";
else say "\t { fun action (i, NIL)";
fi;
sayln " => raise exception LEX_ERROR;";
if *uses_trailing_context sayln "\t action (i, NIL ! l, rs) => action (i - 1, l, rs);";
else sayln "\t action (i, NIL ! l) => action (i - 1, l);";
fi;
if *uses_trailing_context sayln "\t action (i, (node ! acts) ! l, rs) => ";
else sayln "\t action (i, (node ! acts) ! l) => ";
fi;
sayln "\t\t case node";
sayln "\t\t ";
sayln "\t\t internal::NN yyk => ";
sayln "\t\t\t ( { fun yymktext () = substring(*yyb, i0, i-i0);\n\
\\t\t\t yypos = i0 + *yygone;";
if *count_newlines
sayln "\t\t\t yylineno := vector_slice_of_chars::keyed_fold_forward";
sayln "\t\t\t\t (\\\\ (_, '\\n', n) => n+1; (_, _, n) => n; end) *yylineno (vector_slice_of_chars::make_slice (*yyb, i0, THE (i-i0)));";
fi;
if *have_reject
say "\t\t\t fun REJECT() = action (i, acts ! l";
if *uses_trailing_context sayln ", rs);";
else sayln ");";
fi;
fi;
sayln "\t\t\t include package user_declarations;";
sayln "\t\t\t include package internal::start_states;";
sayln " { yybufpos := i;";
sayln " case yyk";
sayln " ";
sayln "";
sayln "\t\t\t# Application actions \n";
makeaccept ends;
say "\n\t\t esac; }; } ";
say "); esac; end; # fun action\n\n";
if *uses_trailing_context
say skel_mid2;
fi;
sayln "\t my { fin, trans } = unsafe::vector::get (internal::tab, s);";
sayln "\t new_accepting_leaves = fin ! accepting_leaves;";
sayln "\t if (l == *yybl)";
sayln "\t if (trans == .trans (vector::get (internal::tab, 0)))";
say "\t action (l, new_accepting_leaves";
if *uses_trailing_context
say ", NIL";
fi;
say ");\n\t else";
sayln "\t newchars= if *yydone \"\"; else yyinput 1024; fi;";
sayln "\t if ((size newchars) == 0)";
sayln "\t\t yydone := TRUE;";
say "\t\t if (l == i0) user_declarations::eof ";
sayln
case *arg_code
NULL => "();";
THE _ => "yyarg;";
esac;
say "\t\t else action (l, new_accepting_leaves";
if *uses_trailing_context sayln ", NIL); fi;";
else sayln "); fi;";
fi;
sayln "\t\t else if (l == i0) yyb := newchars;";
sayln "\t\t\t else yyb := substring(*yyb, i0, l-i0) + newchars; fi;";
sayln "\t\t yygone := *yygone+i0;";
sayln "\t\t yybl := size *yyb;";
sayln "\t\t scan (s, accepting_leaves, l-i0, 0);";
sayln "\t fi; # (size newchars) == 0";
sayln "\t fi; # trans == $trans ...";
sayln "\t else new_char = char::to_int (unsafe::vector_of_chars::get(*yyb, l));";
if (*char_set_size == 129)
sayln "\t\t new_char = if (new_char < 128) new_char; else 128; fi;";
fi;
say "\t\t new_state = ";
sayln ( if *char_format
"char::to_int (unsafe::vector_of_chars::get (trans, new_char));";
else
"unsafe::vector::get (trans, new_char);";
fi
);
say "\t\t if (new_state == 0) action (l, new_accepting_leaves";
if *uses_trailing_context sayln ", NIL);";
else sayln ");";
fi;
sayln "\t\t else scan (new_state, new_accepting_leaves, l+1, i0); fi;";
sayln "\t fi;";
sayln " }; # fun scan";
if (not *uses_previous_newline)
sayln "/*";
fi;
say "\t start= if (substring(*yyb,*yybufpos - 1, 1)==\"\\n\")";
sayln " *yybegin_i+1; else *yybegin_i; fi;";
if (not *uses_previous_newline)
sayln "*/";
fi;
say "\t scan(";
if *uses_previous_newline say "start";
else say "*yybegin_i /* start */ ";
fi;
sayln ", NIL, *yybufpos, *yybufpos); # fun continue";
sayln " }; # fun continue";
sayln
case *arg_code
NULL => " }; # fun lex";
THE _ => " continue; }; # fun lex";
esac;
sayln " lex; ";
sayln " }; # fun make_lexer";
sayln "};";
}; # fun print_lexer
uses_previous_newline := FALSE;
reset_flags();
lex_buf := make_ibuf (fil::open_for_read infile);
next_tok := BOF;
inquote := FALSE;
lex_out := fil::open_for_write outfile;
state_num := 2;
line_num := 1;
state_tab := enter (create (string::(<=)))("initial", 1);
leaf_num := -1;
my (user_code, rules, ends)
=
parse()
except
x = { close_ibuf *lex_buf;
fil::close_output *lex_out;
winix__premicrothread::file::remove_file outfile;
raise exception x;
};
my (fins, trans, tctab, tcpairs)
=
makedfa rules;
if *uses_trailing_context
close_ibuf *lex_buf;
fil::close_output *lex_out;
winix__premicrothread::file::remove_file outfile;
pr_err "lookahead is unimplemented";
fi;
if *header_decl say *header_code;
else say ("package " + *package_name);
fi;
say "{\n";
say skel_hd;
say user_code;
say "}; # end of user routines \n";
say "exception LEX_ERROR; # Raised if illegal leaf action tried.\n";
say "package internal {\n\t \n";
maketable (fins, tctab, tcpairs, trans);
say "package start_states {\n\t \n";
say "\t Yystartstate = STARTSTATE Int;\n";
makebegin();
say "\n };\n";
say "Result = user_declarations::Lex_Result;\n";
say "\t exception LEXER_ERROR; # Raised if illegal leaf action tried */\n";
say "};\n\n";
say if *pos_arg "fun make_lexer (yyinput, yygone0: Int) =\n { \n";
else "fun make_lexer yyinput =\n{\t my yygone0=1;\n";
fi;
if *count_newlines
say "\t my yylineno = REF 0;\n\n";
fi;
say "\t yyb = REF \"\\n\"; \t\t# Buffer \n\
\\t yybl = REF 1;\t\t# Buffer length \n\
\\t yybufpos = REF 1;\t\t# location of next character to use \n\
\\t yygone = REF yygone0;\t# position in file of beginning of buffer \n\
\\t yydone = REF FALSE;\t\t# eof found yet? \n\
\\t yybegin_i = REF 1;\t\t# Current 'start state' for lexer \n\
\\n\t yybegin = \\\\ (internal::start_states::STARTSTATE x) =\n\
\\t\t yybegin_i := x;\n\n";
print_lexer ends;
close_ibuf *lex_buf;
fil::close_output *lex_out;
}; # fun lex_fn
};
end;