## mythryl-token-table-g.pkg
# Compiled by:
#
src/lib/compiler/front/parser/parser.sublib### "The range of our projectiles---even ... the artillery -- however great,
### will never exceed four of those miles of which as many thousand separate
### us from the center of the earth."
###
### --- Galileo,
### Dialogues Concerning Two New Sciences
/***************************************************************************
hashtable for token recognition
***************************************************************************/
# Used in ROOT/src/lib/compiler/front/parser/lex/mythryl.lex
stipulate
package fs = fast_symbol; # fast_symbol is from
src/lib/compiler/front/basics/map/fast-symbol.pkg package hs = hash_string; # hash_string is from
src/lib/src/hash-string.pkg package wht = word_string_hashtable; # word_string_hashtable is from
src/lib/compiler/front/basics/hash/wordstr-hashtable.pkgherein
generic package mythryl_token_table_g (tokens: Mythryl_Tokens) # Mythryl_Tokens is from
src/lib/compiler/front/parser/yacc/mythryl.grammar.api : (weak)
api {
check_id: (String, Int) -> tokens::Token( tokens::Semantic_Value, Int );
check_passive_id: (String, Int) -> tokens::Token( tokens::Semantic_Value, Int );
check_symbol_id: (String, Int) -> tokens::Token( tokens::Semantic_Value, Int );
check_passive_symbol_id: (String, Int) -> tokens::Token( tokens::Semantic_Value, Int );
check_type_var: (String, Int) -> tokens::Token( tokens::Semantic_Value, Int );
new_check_type_var: (String, Int) -> tokens::Token( tokens::Semantic_Value, Int );
check_implicit_thunk_parameter: (String, Int) -> tokens::Token( tokens::Semantic_Value, Int );
}
{ exception NO_TOKEN;
hash_string = hs::hash_string;
fun make_table (size_hint, l)
=
{ t = wht::make_hashtable { size_hint, not_found_exception => NO_TOKEN };
fun ins (str, tokfn)
=
wht::set t ((hash_string str, str), tokfn);
list::apply ins l;
t;
};
symbol_id_table
=
make_table (16, [
# ("-", \\ yypos = tokens::dash (yypos, yypos+1)),
# ("/", \\ yypos = tokens::slash (yypos, yypos+1)),
# ("*", \\ yypos = tokens::star (yypos, yypos+1)),
# ("~", \\ yypos = tokens::tilda (yypos, yypos+1)),
# ("
|", \\ yypos = tokens::bar (yypos, yypos+1)),
(":", \\ yypos = tokens::colon (yypos, yypos+1)),
("=", \\ yypos = tokens::equal_op (yypos, yypos+1)),
("#", \\ yypos = tokens::hash (yypos, yypos+1)),
("==", \\ yypos = tokens::eqeq_op (yypos, yypos+2)),
("&=", \\ yypos = tokens::amper_eq (yypos, yypos+2)),
("@=", \\ yypos = tokens::atsign_eq (yypos, yypos+2)),
("\\=", \\ yypos = tokens::back_eq (yypos, yypos+2)),
# ("!=", \\ yypos = tokens::bang_eq (yypos, yypos+2)), # Don't do this!
("$=", \\ yypos = tokens::buck_eq (yypos, yypos+2)),
("^=", \\ yypos = tokens::caret_eq (yypos, yypos+2)),
("-=", \\ yypos = tokens::dash_eq (yypos, yypos+2)),
(".=", \\ yypos = tokens::dot_eq (yypos, yypos+2)),
("%=", \\ yypos = tokens::percnt_eq (yypos, yypos+2)),
("+=", \\ yypos = tokens::plus_eq (yypos, yypos+2)),
("?=", \\ yypos = tokens::qmark_eq (yypos, yypos+2)),
("/=", \\ yypos = tokens::slash_eq (yypos, yypos+2)),
("*=", \\ yypos = tokens::star_eq (yypos, yypos+2)),
("~=", \\ yypos = tokens::tilda_eq (yypos, yypos+2)),
("++=", \\ yypos = tokens::plusplus_eq (yypos, yypos+3)),
("--=", \\ yypos = tokens::dashdash_eq (yypos, yypos+3)),
("->", \\ yypos = tokens::arrow (yypos, yypos+2)),
("=>", \\ yypos = tokens::darrow (yypos, yypos+2)),
("~~", \\ yypos = tokens::tilda_tilda (yypos, yypos+2)),
("::", \\ yypos = tokens::colon_colon (yypos, yypos+2)),
("??", \\ yypos = tokens::what_what (yypos, yypos+2)),
("?:", \\ yypos = tokens::what_colon (yypos, yypos+2)), # XXX BUGGO FIXME should kill this
(":?", \\ yypos = tokens::colon_what (yypos, yypos+2)), # XXX BUGGO FIXME should kill this
("++", \\ yypos = tokens::plus_plus (yypos, yypos+2)),
("--", \\ yypos = tokens::dash_dash (yypos, yypos+2))
]);
id_table
=
make_table (64, [
("also", \\ yypos = tokens::also_t (yypos, yypos+4)),
("and", \\ yypos = tokens::and_t (yypos, yypos+3)),
("api", \\ yypos = tokens::api_t (yypos, yypos+3)),
("as", \\ yypos = tokens::as_t (yypos, yypos+2)),
("case", \\ yypos = tokens::case_t (yypos, yypos+4)),
("class__", \\ yypos = tokens::class_t (yypos, yypos+5)), # This should be moved to the non-reserved section by switching from "class foo { ... }" to "class package foo { ... }".
("class2__", \\ yypos = tokens::class2_t (yypos, yypos+6)), # This should die in due course.
("elif", \\ yypos = tokens::elif_t (yypos, yypos+4)),
("else", \\ yypos = tokens::else_t (yypos, yypos+4)),
("end", \\ yypos = tokens::end_t (yypos, yypos+3)),
("eqtype", \\ yypos = tokens::eqtype_t (yypos, yypos+6)),
("esac", \\ yypos = tokens::esac_t (yypos, yypos+4)),
("except", \\ yypos = tokens::except_t (yypos, yypos+6)),
("exception", \\ yypos = tokens::exception_t (yypos, yypos+9)),
("fi", \\ yypos = tokens::fi_t (yypos, yypos+2)),
("for", \\ yypos = tokens::for_t (yypos, yypos+3)),
("fprintf", \\ yypos = tokens::fprintf_t (yypos, yypos+7)),
("fun", \\ yypos = tokens::fun_t (yypos, yypos+3)),
("herein", \\ yypos = tokens::herein_t (yypos, yypos+6)),
("if", \\ yypos = tokens::if_t (yypos, yypos+2)),
("lazy", \\ yypos = *mythryl_parser::lazy_is_a_keyword ?? tokens::lazy_t (yypos, yypos+4) :: (raise exception NO_TOKEN)), # We should find a way to make this non-reserved even when in use, assuming we decide to support it at all.
("my", \\ yypos = tokens::my_t (yypos, yypos+2)),
("or", \\ yypos = tokens::or_t (yypos, yypos+2)),
("package", \\ yypos = tokens::package_t (yypos, yypos+7)),
("printf", \\ yypos = tokens::printf_t (yypos, yypos+6)),
("sharing", \\ yypos = tokens::sharing_t (yypos, yypos+7)),
("sprintf", \\ yypos = tokens::sprintf_t (yypos, yypos+7)),
("stipulate", \\ yypos = tokens::stipulate_t (yypos, yypos+9)),
("where", \\ yypos = tokens::where_t (yypos, yypos+5)),
("withtype", \\ yypos = tokens::withtype_t (yypos, yypos+8)),
#
# The above totals 30 reserved words. Of those
#
# "lazy" is not currently supported and probably unneeded anyhow;
# "class__" can be phased out.
# "class2__" can be phased out.
# "fprintf" can be phased out if we introduce a way to allow user-defined compiletime functions.
# "printf" can be phased out if we introduce a way to allow user-defined compiletime functions.
# "sprintf" can be phased out if we introduce a way to allow user-defined compiletime functions.
#
# That leaves 24 reserved words. Of those "eqtype" could be phased out if we rewrote
# to use (e.g.) Harper-Stone semantics/typechecking (instead of the Definition).
#
# Obviously, we could trivially replace "fi" and "esac" with "end"
# if we wanted to, but I don't -- I think they pull their weight
# by improving readability and esthetics.
#
# Bottom line: We have 22-23 hard-core alphabetic reserved words which
# I expect us to support basically forever -- not bad!
# For comparison, last I checked:
# C had 33 reserved words
# MIT Scheme had 37 reserved words
# Scala had 38 reserved words
# Java had 50 reserved words
# Javascript had 63 reserved words
# Ada had 71 reserved words
# C++ had 84 reserved words
# The following are NOT reserved words, just identifiers which have
# special meaning to the compiler in certain contexts. They can all
# still be used as regular identifiers by the user, without conflict:
#
("field", \\ yypos = tokens::field_t (yypos, yypos+5)), # Used in: field my ...
("generic", \\ yypos = tokens::generic_t (yypos, yypos+7)), # Used in: generic package ...
("in", \\ yypos = tokens::in_t (yypos, yypos+2)), #
("include", \\ yypos = tokens::include_t (yypos, yypos+7)), # Used in: include package ... ; include api ... ;
("infix", \\ yypos = tokens::infix_t (yypos, yypos+5)), # Used in: infix my ...
("infixr", \\ yypos = tokens::infixr_t (yypos, yypos+6)), # Used in: infixr my ...
("message", \\ yypos = tokens::message_t (yypos, yypos+6)), # Used in: message fun ...
("method", \\ yypos = tokens::method_t (yypos, yypos+6)), # Used in: method fun ...
("nonfix", \\ yypos = tokens::nonfix_t (yypos, yypos+6)), # Used in: nonfix my ...
("overloaded", \\ yypos = tokens::overloaded_t(yypos, yypos+10)), # Used in: overloaded my ...
("raise", \\ yypos = tokens::raise_t (yypos, yypos+5)), # Used in: raise exception ...
("recursive", \\ yypos = tokens::recursive_t (yypos, yypos+9)) # Used in: recursive my ...
]);
# overload_hash = hash_string "overload";
# lazy_hash = hash_string "lazy";
# Look up an identifier.
# If the symbol is found
# the corresponding token is
# generated, otherwise it is
# a regular lowercase id:
#
fun check_id (str, yypos)
=
{ hash = hash_string str;
#
fun make_id ()
=
tokens::lowercase_id (fs::raw_symbol (hash, str), yypos, yypos+(size str));
wht::get id_table (hash, str) yypos
except
NO_TOKEN = make_id ();
};
fun check_passive_id (string, yypos)
=
{ len = string::length_in_bytes string;
string = string::extract( string, 1, THE (len - 2) ); # Drop leading and trailing parentheses from string.
hash = hash_string string;
fun make_id ()
=
tokens::passiveop_id (fs::raw_symbol (hash, string), yypos, yypos+(size string));
wht::get id_table (hash, string) yypos
except
NO_TOKEN
=
make_id ();
};
# Look up an identifier with a leading '#' on its name.
# We use these for implicit thunk parameters, so that we
# can write
#
# {. #a < #b; }
#
# as an abbreviation for
#
# \\ (a, b) = a < b;
#
fun check_implicit_thunk_parameter (str, yypos)
=
{ hash = hash_string str;
fun make_id ()
=
tokens::implicit_thunk_parameter (fs::raw_symbol (hash, str), yypos, yypos+(size str));
# wht::lookup_implicit_thunk_parameter_table (hash, str) yypos # Eventually we'll want this for #if #else #elif #end
# except
# NO_TOKEN
# =
make_id ();
};
fun check_symbol_id (str, yypos)
=
{ hash = hash_string str;
wht::get symbol_id_table (hash, str) yypos
except
NO_TOKEN
=
tokens::operators_id (fs::raw_symbol (hash, str), yypos, yypos+(size str));
};
fun check_passive_symbol_id (string, yypos)
=
{ len = string::length_in_bytes string;
string = string::extract( string, 1, THE (len - 2) ); # Drop leading and trailing parentheses from string.
hash = hash_string string;
tokens::passiveop_id (fs::raw_symbol (hash, string), yypos+1, yypos+1+(size string));
};
fun check_type_var (str, yypos)
=
{ hash = hash_string str;
tokens::tyvar (fs::raw_symbol (hash, str), yypos, yypos+(size str));
};
fun new_check_type_var (str, yypos)
=
{ string = /* "$" + */ str;
hash = hash_string string;
tokens::tyvar (fs::raw_symbol (hash, string), yypos, yypos+(size str));
};
};
end;
## COPYRIGHT (c) 1996 Bell Laboratories.
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.