## raw-syntax-junk.pkg
# Compiled by:
#
src/lib/compiler/front/parser/parser.sublib### "He wrapped himself in quotations -- as a beggar
### would enfold himself in the purple of Emperors."
###
### -- Rudyard Kipling
stipulate
package em = error_message; # error_message is from
src/lib/compiler/front/basics/errormsg/error-message.pkg# package fix = fixity; # fixity is from
src/lib/compiler/front/basics/map/fixity.pkg package hs = hash_string; # hash_string is from
src/lib/src/hash-string.pkg# package pj = print_junk; # print_junk is from
src/lib/compiler/front/basics/print/print-junk.pkg package raw = raw_syntax; # raw_syntax is from
src/lib/compiler/front/parser/raw-syntax/raw-syntax.pkg package sy = symbol; # symbol is from
src/lib/compiler/front/basics/map/symbol.pkgherein
package raw_syntax_junk
: (weak) Raw_Syntax_Junk # Raw_Syntax_Junk is from
src/lib/compiler/front/parser/raw-syntax/raw-syntax-junk.api {
post_dotdot_hash = hs::hash_string "_..";
dotdot_hash = hs::hash_string "..";
post_plusplus_hash= hs::hash_string "_++";
plusplus_hash = hs::hash_string "++";
post_dashdash_hash= hs::hash_string "_--";
dashdash_hash = hs::hash_string "--";
preamper_hash = hs::hash_string "&_";
amper_hash = hs::hash_string "&";
postamper_hash = hs::hash_string "_&";
preatsign_hash = hs::hash_string "@_";
atsign_hash = hs::hash_string "@";
postatsign_hash = hs::hash_string "_@";
preback_hash = hs::hash_string "\\_";
back_hash = hs::hash_string "\\";
postback_hash = hs::hash_string "_\\";
prebang_hash = hs::hash_string "!_";
bang_hash = hs::hash_string "!";
postbang_hash = hs::hash_string "_!";
prebar_hash = hs::hash_string "
|_";
bar_hash = hs::hash_string "
|";
postbar_hash = hs::hash_string "_
|";
prebuck_hash = hs::hash_string "$_";
buck_hash = hs::hash_string "$";
postbuck_hash = hs::hash_string "_$";
precaret_hash = hs::hash_string "^_";
caret_hash = hs::hash_string "^";
postcaret_hash = hs::hash_string "_^";
predash_hash = hs::hash_string "-_";
dash_hash = hs::hash_string "-";
postdash_hash = hs::hash_string "_-";
preplus_hash = hs::hash_string "+_";
plus_hash = hs::hash_string "+";
postplus_hash = hs::hash_string "_+";
preslash_hash = hs::hash_string "/_";
slash_hash = hs::hash_string "/";
postslash_hash = hs::hash_string "_/";
prestar_hash = hs::hash_string "*_"; # "The fault, dear Brutus, is not in our stars,
star_hash = hs::hash_string "*"; # But in ourselves, that we are underlings."
poststar_hash = hs::hash_string "_*"; # -- William Shakespeare, "Julius Caesar"
pretilda_hash = hs::hash_string "~_";
tilda_hash = hs::hash_string "~";
posttilda_hash = hs::hash_string "_~";
preqmark_hash = hs::hash_string "?_";
qmark_hash = hs::hash_string "?";
postqmark_hash = hs::hash_string "_?";
prepercnt_hash = hs::hash_string "%_";
percnt_hash = hs::hash_string "%";
postpercnt_hash = hs::hash_string "_%";
prelangle_hash = hs::hash_string "<_";
langle_hash = hs::hash_string "<";
prelbrace_hash = hs::hash_string "{_";
lbrace_hash = hs::hash_string "{";
postrangle_hash = hs::hash_string "_>";
rangle_hash = hs::hash_string ">";
postrbrace_hash = hs::hash_string "_}";
rbrace_hash = hs::hash_string "}";
postlbracket_hash = hs::hash_string "_[";
lbracket_hash = hs::hash_string "[";
equal_hash = hs::hash_string "=";
eqeq_hash = hs::hash_string "==";
bar_hash = hs::hash_string "
|";
weakdot_hash = hs::hash_string " . ";
bogus_hash = hs::hash_string "BOGUS";
dollar_bogus_hash = hs::hash_string "$BOGUS";
barens_hash = hs::hash_string "
|_|";
field_hash = hs::hash_string "field";
generic_hash = hs::hash_string "generic";
get_fields_hash = hs::hash_string "get__fields";
in_hash = hs::hash_string "in";
include_hash = hs::hash_string "include";
infix_hash = hs::hash_string "infix";
infixr_hash = hs::hash_string "infixr";
message_hash = hs::hash_string "message";
method_hash = hs::hash_string "method";
nonfix_hash = hs::hash_string "nonfix";
overloaded_hash = hs::hash_string "overloaded";
raise_hash = hs::hash_string "raise";
recursive_hash = hs::hash_string "recursive";
post_dotdot_string = "_..";
dotdot_string = "..";
post_plusplus_string= "_++";
plusplus_string = "++";
post_dashdash_string= "_--";
dashdash_string = "--";
preamper_string = "&_";
preatsign_string = "@_";
preback_string = "\\_";
prebang_string = "!_";
prebar_string = "
|_";
prebuck_string = "$_";
precaret_string = "^_";
predash_string = "-_";
prepercnt_string = "%_";
preplus_string = "+_";
preqmark_string = "?_";
preslash_string = "/_";
prestar_string = "*_";
pretilda_string = "~_";
prelangle_string = "<_";
langle_string = "<";
prelbrace_string = "{_";
lbrace_string = "{";
postrangle_string = "_>";
rangle_string = ">";
postrbrace_string = "_}";
rbrace_string = "}";
postlbracket_string = "_[";
lbracket_string = "[";
amper_string = "&";
atsign_string = "@";
back_string = "\\";
bang_string = "!";
bar_string = "
|";
buck_string = "$";
caret_string = "^";
dash_string = "-";
percnt_string = "%";
plus_string = "+";
qmark_string = "?";
slash_string = "/";
star_string = "*";
tilda_string = "~";
postamper_string = "_&";
postatsign_string = "_@";
postback_string = "_\\";
postbang_string = "_!";
postbar_string = "_
|";
postbuck_string = "_$";
postcaret_string = "_^";
postdash_string = "_-";
postpercnt_string = "_%";
postplus_string = "_+";
postqmark_string = "_?";
postslash_string = "_/";
poststar_string = "_*";
posttilda_string = "_~";
equal_string = "=";
eqeq_string = "==";
bar_string = "
|";
weakdot_string = " . ";
bogus_string = "BOGUS";
dollar_bogus_string = "$BOGUS";
barens_string = "
|_|";
postbang_string = "_!";
field_string = "field";
generic_string = "generic";
get_fields_string = "get__fields";
in_string = "in";
include_string = "include";
infix_string = "infix";
infixr_string = "infixr";
message_string = "message";
method_string = "method";
nonfix_string = "nonfix";
overloaded_string = "overloaded";
raise_string = "raise";
recursive_string = "recursive";
void_pattern = raw::RECORD_PATTERN { definition => NIL, is_incomplete => FALSE };
void_expression = raw::RECORD_IN_EXPRESSION NIL;
true_valcon = [sy::make_value_symbol "TRUE"];
false_valcon = [sy::make_value_symbol "FALSE"];
quote_valcon = [sy::make_package_symbol "Lib7", sy::make_value_symbol "QUOTE"];
antiquote_valcon = [sy::make_package_symbol "Lib7", sy::make_value_symbol "ANTIQUOTE"];
arrow_type = sy::make_type_symbol "->";
exception_id = sy::make_type_symbol "Exception";
sym_arg = sy::make_package_symbol "<Parameter>";
bogus_id = sy::make_value_symbol "BOGUS";
it_symbol = [ sy::make_value_symbol "it" ];
# 2007-12-31 CrT: This check used to limit fixity
# precedences to the range 0-9;
# I cannot find any particular
# reason in the code for this, and
# have relaxed it.
fun check_fixity (fixity, err)
=
if (fixity < 0 or fixity > 99)
#
err em::ERROR "fixity precedence must be between 0 and 99" em::null_error_body;
99;
else
fixity;
fi;
# Layered patterns:
#
fun lay3 ((x as raw::VARIABLE_IN_PATTERN _), y, _)
=>
raw::AS_PATTERN { variable_pattern => x, expression_pattern => y };
lay3 (raw::TYPE_CONSTRAINT_PATTERN { pattern, type_constraint }, y, err)
=>
{ err em::ERROR "illegal (multiple?) type constraints in AS pattern" em::null_error_body;
case (lay3 (pattern, y, err))
#
raw::AS_PATTERN { variable_pattern, expression_pattern }
=>
raw::AS_PATTERN
{
variable_pattern,
expression_pattern
=>
raw::TYPE_CONSTRAINT_PATTERN
{
pattern => expression_pattern,
type_constraint
}
};
other => other;
esac;
};
lay3 (raw::SOURCE_CODE_REGION_FOR_PATTERN (x, _), y, err)
=>
lay3 (x, y, err);
lay3 (raw::PRE_FIXITY_PATTERN [x], y, err)
=>
{ err em::ERROR "parentheses illegal around variable in AS pattern" em::null_error_body;
y;
};
lay3 (x, y, err)
=>
{ err em::ERROR "pattern to left of AS must be variable" em::null_error_body;
y;
};
end;
fun lay2 (raw::TYPE_CONSTRAINT_PATTERN { pattern, type_constraint }, y, err)
=>
{ err em::ERROR "illegal (multiple?) type constraints in AS pattern" em::null_error_body;
case (lay2 (pattern, y, err))
#
raw::AS_PATTERN { variable_pattern, expression_pattern }
=>
raw::AS_PATTERN
{ variable_pattern,
expression_pattern
=>
raw::TYPE_CONSTRAINT_PATTERN
{ pattern => expression_pattern,
type_constraint
}
};
pattern => pattern;
esac;
};
lay2 (raw::SOURCE_CODE_REGION_FOR_PATTERN (x, _), y, err)
=>
lay2 (x, y, err);
lay2 (raw::PRE_FIXITY_PATTERN [ { item, ... } ], y, err)
=>
lay3 (item, y, err);
lay2 p
=>
lay3 p;
end;
fun lay (raw::TYPE_CONSTRAINT_PATTERN { pattern, type_constraint }, y, err)
=>
case (lay2 (pattern, y, err))
#
raw::AS_PATTERN { variable_pattern, expression_pattern }
=>
raw::AS_PATTERN
{ variable_pattern,
expression_pattern
=>
raw::TYPE_CONSTRAINT_PATTERN
{ pattern => expression_pattern,
type_constraint
}
};
pattern => pattern;
esac;
lay (raw::SOURCE_CODE_REGION_FOR_PATTERN (x, _), y, err)
=>
lay (x, y, err);
lay p => lay2 p;
end;
layered = lay;
# Sequence of declarations
#
fun make_declaration_sequence (raw::SEQUENTIAL_DECLARATIONS a, raw::SEQUENTIAL_DECLARATIONS b) => raw::SEQUENTIAL_DECLARATIONS ( a @ b );
make_declaration_sequence (raw::SEQUENTIAL_DECLARATIONS a, b) => raw::SEQUENTIAL_DECLARATIONS ( a @ [b] );
make_declaration_sequence ( a, raw::SEQUENTIAL_DECLARATIONS b) => raw::SEQUENTIAL_DECLARATIONS ( a ! b );
make_declaration_sequence ( a, b) => raw::SEQUENTIAL_DECLARATIONS [ a, b ];
end;
fun block_to_let
block_declarations_and_expressions2 # THIS IS IN REVERSE ORDER!
=
# This is where we deal with the mismatch between
# our block-structured surface syntax and the LET-oriented
# raw-syntax.api view of the world. Depending on what is in
# block_declarations_and_expressions
# we synthesize either LET statement or a simple expression.
#
# Note that
# block_declarations_and_expressions
# is in reverse order, which is convenient given that
# what matters most is whether the last statement lexically
# was an expression or a declaration.
#
# In the first case below, the block consists of a single
# expression. We strip the expression of its wrapping and
# return it.
#
# In the second case below, the block contains more
# than one statement, and the last is an expression.
# We construct and return a LET holding it all.
#
# In all other cases we don't have a terminal expression
# to yield a value for the block, so we create and return
# a LET with void_expression as its value.
#
case block_declarations_and_expressions2
#
[ raw::SOURCE_CODE_REGION_FOR_DECLARATION (
raw::VALUE_DECLARATIONS (
[ raw::NAMED_VALUE {
expression,
pattern => raw::WILDCARD_PATTERN,
...
}
],
NIL
),
_
)
]
=>
expression;
raw::SOURCE_CODE_REGION_FOR_DECLARATION (
raw::VALUE_DECLARATIONS (
[ raw::NAMED_VALUE {
expression,
pattern => raw::WILDCARD_PATTERN,
...
}
],
NIL
),
_
) ! rest
=>
raw::LET_EXPRESSION {
expression,
declaration => raw::SEQUENTIAL_DECLARATIONS (reverse rest)
};
rest
=>
raw::LET_EXPRESSION {
expression => void_expression,
declaration => raw::SEQUENTIAL_DECLARATIONS (reverse rest)
};
esac;
fun quote_expression s
=
raw::APPLY_EXPRESSION
{
function => raw::VARIABLE_IN_EXPRESSION quote_valcon,
argument => raw::STRING_CONSTANT_IN_EXPRESSION s
};
fun antiquote_expression e
=
raw::APPLY_EXPRESSION {
function => raw::VARIABLE_IN_EXPRESSION antiquote_valcon,
argument => e
};
# Two little fns for use in rule actions, which
# annotate syntax expression and declaration trees
# with the corresponding source file line+column
# number range (s).
#
# They do nothing if the tree is already so annotated:
#
fun mark_expression (e as raw::SOURCE_CODE_REGION_FOR_EXPRESSION _, _, _) => e;
mark_expression (e, a, b) => raw::SOURCE_CODE_REGION_FOR_EXPRESSION (e, (a, b));
end;
fun mark_declaration (d as raw::SOURCE_CODE_REGION_FOR_DECLARATION _, _, _) => d;
mark_declaration (d, a, b) => raw::SOURCE_CODE_REGION_FOR_DECLARATION (d, (a, b));
end;
# Fake up a
# my _ = ...
# by hand to make an expression
# look like a declaration:
#
fun expression_to_declaration
(expression, left, right)
=
mark_declaration (
raw::VALUE_DECLARATIONS (
[ raw::NAMED_VALUE {
expression,
pattern => raw::WILDCARD_PATTERN,
is_lazy => FALSE
}
],
NIL
),
left,
right
);
# This fn is called (only) from:
#
#
src/lib/compiler/toplevel/interact/read-eval-print-loop-g.pkg #
fun extract_toplevel_declarations (dec: raw::Declaration) # "dec" == "declaration".
=
reap_toplevel_statements (dec, []) # See bottom-of-fn comments.
where
fun reap_toplevel_statements (dec, results)
=
case dec
#
raw::SOURCE_CODE_REGION_FOR_DECLARATION # This pattern is more fragile than one would like,
( # since it depends on just where sourcecode region
raw::SEQUENTIAL_DECLARATIONS # info is/not inserted by the parser, but making it
[ dec1 as raw::SOURCE_CODE_REGION_FOR_DECLARATION # more robust does not seem like a cost-effective use
( # of programming time just now. -- 2012-01-22 CrT
raw::VALUE_DECLARATIONS
( [ raw::NAMED_VALUE { pattern => raw::VARIABLE_IN_PATTERN [ it_symbol ], ... } ],
_
),
region'
),
dec2
],
region
)
=>
reap_toplevel_statements (dec2, raw::SOURCE_CODE_REGION_FOR_DECLARATION (dec1, region') ! results);
_ => reverse (dec ! results);
esac;
end;
#
# Given a raw_syntax::Declaration equivalent to
#
# my it = foo ();
# my it = bar ();
# my it = zot ();
# ...
#
# we return a list of the individual declarations.
# The immediate motivation for this is that in
#
# src/lib/compiler/front/parser/yacc/mythryl.grammar
#
# we define
#
# toplevel_declarations: toplevel_declaration SEMI
#
| toplevel_declaration SEMI toplevel_declarations
#
# which means that a multi-statement script of statements
# as above will parse as a single toplevel statement, because
# YACC defaults to returning the longest possible syntactically
# valid parse.
#
# This is a problem when processing scripts because we need
# to be able to do something like
#
# #!/usr/bin/mythryl
# load "foo.lib";
# foo::bar();
#
# where the 'load' statement adds package "foo" to the global
# environment for use in the rest of the script; if both
# lines are compiled as a unit, the global environment will
# be updated too late, and foo:: will come up as an undefined
# library.
#
# We deal with this in
#
#
src/lib/compiler/toplevel/interact/read-eval-print-loop-g.pkg #
# by post-parse breaking up the raw syntax parsetree into
# its logical constituents and compiling them separately;
# the function here is support for that.
#
# In actual raw-syntax format, syntax such as
#
# my it = foo ();
# my it = bar ();
# my it = zot ();
# ...
#
# come out looking like
#
# SEQ[ it=foo();
# SEQ[ it=bar();
# SEQ[ it=zot();
# ...
# ] ] ]
#
# or in more exhaustive detail
#
# SOURCE_CODE_REGION_FOR_DECLARATION <...>
# SEQUENTIAL_DECLARATIONS[
# SOURCE_CODE_REGION_FOR_DECLARATION <...> VALUE_DECLARATIONS [ NAMED_VALUE[ VARIABLE_IN_PATTERN it = ... ]NAMED_VALUE ]VALUE_DECLARATIONS
# ;SEQUENTIAL_DECLARATIONS
# SOURCE_CODE_REGION_FOR_DECLARATION <...>
# SEQUENTIAL_DECLARATIONS[
# SOURCE_CODE_REGION_FOR_DECLARATION <...> VALUE_DECLARATIONS [ NAMED_VALUE[ VARIABLE_IN_PATTERN it = ... ]NAMED_VALUE ]VALUE_DECLARATIONS
# ...
#
# so basically we need to drill down through the sourcecode region
# info and take the first element of each decl-sequence and return
# a list of the results.
}; # package raw_syntax_junk
end;