## symbol.pkg
#
# In the early phases of the compiler we track
# variables, functions, types etc by assigning
# them symbols which we store in symbolmapstacks.
# These 'symbols' correspond directly to user
# identifiers appearing in the source code. See:
#
#
src/lib/compiler/front/typer-stuff/symbolmapstack/symbolmapstack.pkg#
# In the later phases of the compiler, as we simplify
# and abstract away from the sourcecode, we in essence
# switch from *naming* things to *numbering* them.
# Instead of looking up symbols in symbolmapstacks
# we look up stamps in stampmapstacks, where 'stamps'
# are in essence small integers sequentially assigned
# starting at zero whose only property of interest is
# uniqueness -- being unequal to all other stamps of
# interest. See:
#
#
src/lib/compiler/front/typer-stuff/basics/stamp.pkg#
src/lib/compiler/front/typer-stuff/modules/stampmapstack.pkg# Compiled by:
#
src/lib/compiler/front/basics/basics.sublib# A 'symbol' is our internal representation of a
# source-code identifier like 'my_value' or '&'.
#
# We distinguish nine separate namespaces:
#
# values
# types
# APIs
# packages
# generics
# fixity declarations
# record labels
# type variables
# generic APIs
#
# In principle (ignoring capitalization conventions)
# a given identifier may have different namings in
# each of these namespaces: it may name a value,
# and also a type, and also a api, and so forth.
#
# Before we create a symbol, we figure out which namespace
# it is in, and include that information within it.
#
# Conceptually, thus, our symbol representation is a pair like
#
# (VALUE_NAMESPACE, "my_value")
#
# where the first entry identifies the namespace and the
# second is the programmer-specified surface name for
# the symbol.
#
# As a speed hack, we make the first element of the
# pair an integer containing two bitfields, one of
# which identifies the namespace, the other of which
# is a hash of the string.
# This lets us most of the time establish that
# two symbols are different, or compute an ordering
# for them, using a single fast integer comparison,
# instead of having to do a slow loop over the
# individual bytes of the string representation.
stipulate
package hs = hash_string; # hash_string is from
src/lib/src/hash-string.pkg package err = error_message; # error_message is from
src/lib/compiler/front/basics/errormsg/error-message.pkgherein
package symbol {
#
# Define the numeric tags which we use
# to distinguish the various namespaces:
#
value_namespace_tag = 0u0;
api_namespace_tag = 0u1;
package_namespace_tag = 0u2;
generic_api_namespace_tag = 0u3;
generic_namespace_tag = 0u4;
type_namespace_tag = 0u5;
label_namespace_tag = 0u6;
typevar_namespace_tag = 0u7;
fixity_namespace_tag = 0u8;
# Define our fundamental representation
# for a symbol -- the hash + namespaceID
# integer part paired with the printable
# string "my_value" part:
#
Symbol
=
SYMBOL (Unt, String);
# Define constructors distiguishing
# our different namespaces:
#
Namespace
= VALUE_NAMESPACE
| TYPE_NAMESPACE
| API_NAMESPACE
| PACKAGE_NAMESPACE
| GENERIC_NAMESPACE
| FIXITY_NAMESPACE
| LABEL_NAMESPACE
| TYPEVAR_NAMESPACE
| GENERIC_API_NAMESPACE
;
# Define equality of symbols.
#
# Note that the first '==' comparison
# is a fast integer operation but
# the second is a slow string loop:
#
fun eq ( SYMBOL (a1, b1),
SYMBOL (a2, b2)
)
=
a1 == a2 and
b1 == b2;
# Define the obvious slow alphabetic ordering over symbols:
#
fun symbol_gt (SYMBOL (_, string1), SYMBOL (_, string2))
=
string1 > string2;
fun symbol_compare (SYMBOL (_, string1), SYMBOL (_, string2))
=
string::compare (string1, string2);
# Define a fast ordering on symbols
# which most of the time needs just
# an integer comparison.
#
# The drawback is that the order
# looks random to humans, but that
# is fine for just ordering binary
# trees for internal symbol lookup purposes:
#
fun symbol_fast_lt (SYMBOL (a1, s1), SYMBOL (a2, s2))
=
a1 < a2
or a1 == a2 and s1 < s2;
# Define functions for creating symbols,
# one function per namespace:
#
fun make_value_symbol (name: String) = SYMBOL (hs::hash_string name + value_namespace_tag, name);
fun make_type_symbol (name: String) = SYMBOL (hs::hash_string name + type_namespace_tag, name);
fun make_fixity_symbol (name: String) = SYMBOL (hs::hash_string name + fixity_namespace_tag, name);
fun make_label_symbol (name: String) = SYMBOL (hs::hash_string name + label_namespace_tag, name);
fun make_typevar_symbol (name: String) = SYMBOL (hs::hash_string name + typevar_namespace_tag, name);
fun make_api_symbol (name: String) = SYMBOL (hs::hash_string name + api_namespace_tag, name);
fun make_package_symbol (name: String) = SYMBOL (hs::hash_string name + package_namespace_tag, name);
fun make_generic_symbol (name: String) = SYMBOL (hs::hash_string name + generic_namespace_tag, name);
fun make_generic_api_symbol (name: String) = SYMBOL (hs::hash_string name + generic_api_namespace_tag, name);
fun make_value_and_fixity_symbols name
=
{ hash = hs::hash_string name;
( SYMBOL (hash + value_namespace_tag, name),
SYMBOL (hash + fixity_namespace_tag, name)
);
};
# Provide access to both halves
# of our symbol representation:
#
fun name (SYMBOL(_, name)) = name;
fun number (SYMBOL (number, _)) = number;
# Compute the namespace which
# a given symbol inhabits: XXX BUGGO FIXME shouldn't we be doing a fast bitfield extract instead of a rehash...?!
#
fun name_space (SYMBOL (number, name)) : Namespace
=
case (number - hs::hash_string name)
#
0u0 => VALUE_NAMESPACE;
0u1 => API_NAMESPACE;
0u2 => PACKAGE_NAMESPACE;
0u3 => GENERIC_API_NAMESPACE;
0u4 => GENERIC_NAMESPACE;
0u5 => TYPE_NAMESPACE;
0u6 => LABEL_NAMESPACE;
0u7 => TYPEVAR_NAMESPACE;
0u8 => FIXITY_NAMESPACE;
#
_ => err::impossible "symbol::name_space";
esac;
# Map an internal namespace
# constructor to a human-friendly
# string for printout purposes:
#
fun name_space_to_string (n: Namespace) : String
=
case n
VALUE_NAMESPACE => "val";
TYPE_NAMESPACE => "type";
API_NAMESPACE => "api";
PACKAGE_NAMESPACE => "pkg";
GENERIC_NAMESPACE => "generic";
FIXITY_NAMESPACE => "fixity";
LABEL_NAMESPACE => "label";
TYPEVAR_NAMESPACE => "typevar";
GENERIC_API_NAMESPACE => "generic_api";
esac;
fun describe s
=
cat [name_space_to_string (name_space s), " ", name s];
fun symbol_to_string (SYMBOL (number, name)): String
=
case (number - hs::hash_string name)
#
0u0 => "VAL$" + name;
0u1 => "SIG$" + name;
0u2 => "STR$" + name;
0u3 => "FSIG$" + name;
0u4 => "FCT$" + name;
0u5 => "TYC$" + name;
0u6 => "LAB$" + name;
0u7 => "TYV$" + name;
0u8 => "FIX$" + name;
#
_ => err::impossible "symbol::to_string";
esac;
}; # package symbol
end;