## html-elements-g.pkg
# Compiled by:
#
src/lib/html/html.lib# This module builds element tags for the lexer.
generic package html_elements_g (
package tokens: Html_Tokens; # Html_Tokens is from
src/lib/html/html.grammar.api package err: Html_Error; # Html_Error is from
src/lib/html/html-error.api package htmlattrs: Html_Attributes; # Html_Attributes is from
src/lib/html/html-attributes.api )
: (weak)
api {
package t: Html_Tokens; # Html_Tokens is from
src/lib/html/html.grammar.api Source_Position = Int;
start_tag: Null_Or( String ) -> ((String, Source_Position, Source_Position)) -> Null_Or( t::Token( t::Semantic_Value, Source_Position ) );
end_tag: Null_Or( String ) -> ((String, Source_Position, Source_Position)) -> Null_Or( t::Token( t::Semantic_Value, Source_Position ) );
}
{
package t = tokens;
package a = htmlattrs;
Source_Position = Int;
Start_Tag
= WATTRS (((a::Attributes, Source_Position, Source_Position)) -> t::Token (t::Semantic_Value, Source_Position))
| WOATTRS (((Source_Position, Source_Position)) -> t::Token (t::Semantic_Value, Source_Position));
End_Tag
= END (((Source_Position, Source_Position)) -> t::Token (t::Semantic_Value, Source_Position))
| EMPTY;
token_data = [
("A", WATTRS t::start_a, END t::end_a),
("ADDRESS", WOATTRS t::start_address, END t::end_address),
("APPLET", WATTRS t::start_applet, END t::end_applet),
("AREA", WATTRS t::tag_area, EMPTY),
("B", WOATTRS t::start_b, END t::end_b),
("BASE", WATTRS t::tag_base, EMPTY),
("BASEFONT", WATTRS t::start_basefont, END t::end_basefont),
("BIG", WOATTRS t::start_big, END t::end_big),
("BLOCKQUOTE", WOATTRS t::start_blockquote, END t::end_blockquote),
("BODY", WATTRS t::start_body, END t::end_body),
("BR", WATTRS t::tag_br, EMPTY),
("CAPTION", WATTRS t::start_caption, END t::end_caption),
("CENTER", WOATTRS t::start_center, END t::end_center),
("CITE", WOATTRS t::start_cite, END t::end_cite),
("CODE", WOATTRS t::start_code, END t::end_code),
("DD", WOATTRS t::start_dd, END t::end_dd),
("DFN", WOATTRS t::start_dfn, END t::end_dfn),
("DIR", WATTRS t::start_dir, END t::end_dir),
("DIV", WATTRS t::start_div, END t::end_div),
("DL", WATTRS t::start_dl, END t::end_dl),
("DT", WOATTRS t::start_dt, END t::end_dt),
("EM", WOATTRS t::start_em, END t::end_em),
("FONT", WATTRS t::start_font, END t::end_font),
("FORM", WATTRS t::start_form, END t::end_form),
("H1", WATTRS t::start_h1, END t::end_h1),
("H2", WATTRS t::start_h2, END t::end_h2),
("H3", WATTRS t::start_h3, END t::end_h3),
("H4", WATTRS t::start_h4, END t::end_h4),
("H5", WATTRS t::start_h5, END t::end_h5),
("H6", WATTRS t::start_h6, END t::end_h6),
("HEAD", WOATTRS t::start_head, END t::end_head),
("HR", WATTRS t::tag_hr, EMPTY),
("HTML", WOATTRS t::start_html, END t::end_html),
("I", WOATTRS t::start_i, END t::end_i),
("IMG", WATTRS t::tag_img, EMPTY),
("INPUT", WATTRS t::tag_input, EMPTY),
("ISINDEX", WATTRS t::tag_isindex, EMPTY),
("KBD", WOATTRS t::start_kbd, END t::end_kbd),
("LI", WATTRS t::start_li, END t::end_li),
("LINK", WATTRS t::tag_link, EMPTY),
("MAP", WATTRS t::start_map, END t::end_map),
("MENU", WATTRS t::start_menu, END t::end_menu),
("META", WATTRS t::tag_meta, EMPTY),
("OL", WATTRS t::start_ol, END t::end_ol),
("OPTION", WATTRS t::start_option, END t::end_option),
("P", WATTRS t::start_p, END t::end_p),
("PARAM", WATTRS t::tag_param, EMPTY),
("PRE", WATTRS t::start_pre, END t::end_pre),
("SAMP", WOATTRS t::start_samp, END t::end_samp),
("SCRIPT", WOATTRS t::start_script, END t::end_script),
("SELECT", WATTRS t::start_select, END t::end_select),
("SMALL", WOATTRS t::start_small, END t::end_small),
("STRIKE", WOATTRS t::start_strike, END t::end_strike),
("STRONG", WOATTRS t::start_strong, END t::end_strong),
("STYLE", WOATTRS t::start_style, END t::end_style),
("SUB", WOATTRS t::start_sub, END t::end_sub),
("SUP", WOATTRS t::start_sup, END t::end_sup),
("TABLE", WATTRS t::start_table, END t::end_table),
("TD", WATTRS t::start_td, END t::end_td),
("TEXTAREA", WATTRS t::start_textarea, END t::end_textarea),
("TH", WATTRS t::start_th, END t::end_th),
("TITLE", WOATTRS t::start_title, END t::end_title),
("TR", WATTRS t::start_tr, END t::end_tr),
("TT", WOATTRS t::start_tt, END t::end_tt),
("U", WOATTRS t::start_u, END t::end_u),
("UL", WATTRS t::start_ul, END t::end_ul),
("VAR", WOATTRS t::start_var, END t::end_var)
];
package sht
=
typelocked_hashtable_g (
#
Hash_Key = String;
hash_value = hash_string::hash_string;
same_key = ((==) : ((String, String)) -> Bool);
);
elem_table
=
table
where
table = sht::make_hashtable { size_hint => length token_data, not_found_exception => DIE "HTMLElements" };
fun ins (tag, start_tok, end_tok)
=
sht::set
table
(tag, { start_t=>start_tok, end_t=>end_tok } );
list::apply ins token_data;
end;
package ss = substring; # substring is from
src/lib/std/substring.pkg fun canonical_name name
=
ss::translate
(string::from_char o char::to_upper)
name;
fun find name
=
(sht::find elem_table (canonical_name name));
skip_ws = ss::drop_prefix char::is_space;
fun scan_string (ctx, quote_char, ss)
=
{ my (str, rest)
=
ss::split_off_prefix
{. #c != quote_char; }
ss;
if (ss::is_empty rest)
err::lex_error ctx "missing close quote for string";
(a::STRING (ss::to_string str), rest);
else
(a::STRING (ss::to_string str), ss::drop_first 1 rest);
fi;
};
# Scan an attribute value from a substring, returning the value, and
# the rest of the substring. Attribute values have one of the following
# forms:
# 1) a name token (a sequence of letters, digits, periods, or hyphens).
# 2) a string literal enclosed in ""
# 3) a string literal enclosed in ''
#
fun scan_attribute_val (ctx, attribute_name, ss)
=
{ fun is_name_char ('.'
| '-') => TRUE;
is_name_char c => char::is_alphanumeric c;
end;
case (ss::getc ss)
NULL => (a::IMPLICIT, ss);
THE('"', rest) => scan_string (ctx, '"', rest);
THE('\'', rest) => scan_string (ctx, '\'', rest);
THE (c, _)
=>
{ # Unquoted attributes should be Names, but this is often not
# the case, so we terminate them on whitespace or ">".
#
not_name_char = REF FALSE;
fun is_attribute_char c
=
if (char::is_space c or c == '>')
FALSE;
else
if (not (is_name_char c)) not_name_char := TRUE; fi;
TRUE;
fi;
my (value, rest)
=
ss::split_off_prefix is_attribute_char ss;
if (ss::is_empty value)
err::bad_attribute_val ctx (ss::to_string attribute_name, "");
(a::IMPLICIT, ss);
else
if *not_name_char
err::unquoted_attribute_val ctx (ss::to_string attribute_name);
(a::STRING (ss::to_string value), rest);
else (a::NAME (ss::to_string value), rest); fi;
fi;
};
esac;
};
fun scan_start_tag (ctx, ss)
=
scan_attributes (rest, [])
where
my (name, rest)
=
ss::split_off_prefix (not o char::is_space) ss;
fun scan_attributes (rest, attributes)
=
{
rest = skip_ws rest;
case (ss::getc rest)
NULL => (name, list::reverse attributes);
THE ('"', rest)
=>
{ err::lex_error ctx "bogus text in element";
scan_attributes (#2 (scan_string (ctx, '"', rest)), attributes);
};
THE ('\'', rest)
=>
{ err::lex_error ctx "bogus text in element";
scan_attributes (#2 (scan_string (ctx, '\'', rest)), attributes);
};
THE (c, rest')
=>
if (char::is_alpha c)
my (a_name, rest)
=
ss::split_off_prefix
char::is_alphanumeric
rest;
rest = skip_ws rest;
case (ss::getc rest)
THE ('=', rest)
=>
{ # Get the attribute value:
#
my (a_val, rest)
=
scan_attribute_val (ctx, a_name, skip_ws rest);
scan_attributes (rest, (canonical_name a_name, a_val) ! attributes);
};
_ =>
scan_attributes (rest,
(canonical_name a_name, a::IMPLICIT) ! attributes);
esac;
else
err::lex_error ctx "bogus character in element";
scan_attributes (rest', attributes);
fi;
esac;
};
end;
fun start_tag file (tag, p1, p2)
=
{
ctx = { file, line=>p1 };
tag' = ss::drop_first 1 (ss::drop_last 1 (ss::from_string tag));
my (name, attributes)
=
scan_start_tag (ctx, tag');
case (find name, attributes)
(NULL, _) => { err::bad_start_tag ctx (ss::to_string name); NULL; };
(THE { start_t=>WOATTRS _, ... }, _ ! _) => {
list::apply (err::unknown_attribute ctx o #1) attributes; NULL;};
(THE { start_t=>WOATTRS tag, ... }, []) =>
THE (tag (p1, p2));
(THE { start_t=>WATTRS tag, ... }, attributes) =>
THE (tag (attributes, p1, p2));
esac;
};
fun end_tag file (tag, p1, p2)
=
{
ctx = { file, line=>p1 };
name = ss::drop_first 2 (ss::drop_last 1 (ss::from_string tag));
case (find name)
NULL => { err::bad_end_tag ctx (ss::to_string name); NULL;};
THE { end_t=>EMPTY, ... } => { err::bad_end_tag ctx (ss::to_string name); NULL;};
THE { end_t=>END end_tok, ... } => THE (end_tok (p1, p2));
esac;
};
};
## COPYRIGHT (c) 1996 AT&T REsearch.
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.