## tk-markup-g.pkg
## (C) 1996, 1998, Bremen Institute for Safe Systems, Universitaet Bremen
## Author: cxl (Last modification by $Author: 2cxl $)
# Compiled by:
#
src/lib/tk/src/toolkit/sources.sublib# ***************************************************************************
#
# tk Generic Markup Language: writing down annotated texts.
#
# This module allows one to write down texts with embedded text_items in an
# SGML-like format.
#
# See standard-markup-tags-g.pkg for a full-fledged instantiation of this generic
# markup language, and tests+examples/markup_ex.pkg for a wee example.
#
# $Date: 2001/03/30 13:39:46 $
# $Revision: 3.0 $
#
#
# **************************************************************************
### "The theoretical broadening which comes
### from having many humanities subjects
### on the campus is offset by the general dopiness
### of the people who study these things."
###
### -- Richard P. Feynman
generic package tk_markup_g (tags: Tags) # Tags is from
src/lib/tk/src/toolkit/markup.api: (weak) Tk_Markup # Tk_Markup is from
src/lib/tk/src/toolkit/markup.api# where type Widget_Info = tags::Widget_Info
{
include package tk;
include package basic_utilities;
#
# This defines the abstract syntax of a text with text_items in it.
# I won't bore you with a BNF, but roughly the syntax is as follows:
#
# elemStart (nm, a1, ... an) is <nm a1 ... an>
# -- start of an "element" in SGML-speak.
# Note there must be _no space_ betweem
# the opening < and the name nm, and nm has to start with _letter_;
# a1 to an are the arguments of the element
#
# elemEnd nm is <\nm>
# -- the end of an element
#
# escape e is &str;
# -- the escape e denoted by str
#
# quote str is just the string str
An_Text_El = ELEM_START (String, List( String ))
| QUOTE Substring
| ESCAPE String
| ELEM_END String;
Annotated_Text = List( An_Text_El );
# *******************************************************************
#
# The parser
#
# The parser is extremely tolerant; it doesn't generate any errors, and
# if it can't decipher something it will just leave it as a verbal
# quote.
package parser
=
package {
include package substring;
# The lexical elements
Lexem = OPEN_EL
| OPEN_END_EL | OPEN_ESC;
# corresponding to <, <\ and &; plus > and ;
# which only become a lexem after one of these
# handy slices
fun slice_from_to (t, i, n)= slice (t, i, THE (n-i+1));
fun slice_to_end (t, i) = slice (t, i, NULL);
# convert string to lowercase:
fun to_lower_str substr
=
string::implode (map char::to_lower (substring::explode substr));
fun next_is_alpha t i
=
char::is_alpha (sub (t, i+1))
except
INDEX_OUT_OF_BOUNDS = FALSE;
fun error_context (t, i)
=
{ t= slice_to_end (t, i);
"'" $ (if (size t < 25 ) string t;
else (string (slice (t, 0, THE (25))) $ "...");fi) $ "'";
};
# find first valid occurence of a lexem in t, starting from index i
# Return either the index into the string right after the lexem, and
# the lexem, if there is one; or NULL and the index to the end of the
# string:
#
fun scan_next_lex t i
=
case (sub (t, i))
# & and < are only valid lexemes
# if followed by a letter:
'&' => if (next_is_alpha t i) (i - 1, i+1, THE open_esc);
else scan_next_lex t (i+1);
fi;
'<' => if (next_is_alpha t i)
(i - 1, i+1, THE open_el);
else
if ((sub (t, i+1)) == '\\')
if (next_is_alpha t (i+1))
(i - 1, i+2, THE open_end_el);
else
scan_next_lex t (i+2);
fi;
else
scan_next_lex t (i+1);
fi;
fi;
_ => scan_next_lex t (i+1);
esac
except
INDEX_OUT_OF_BOUNDS
=
(i - 1, i - 1, NULL);
# Have passed end of string
# Find next occurence of >
#
fun scan_close_el t i
=
case (sub (t, i))
'>' => THE (i+1);
_ => scan_close_el t (i+1)
except
INDEX_OUT_OF_BOUNDS = NULL;
esac;
fun scan_close_esc t i
=
case (sub (t, i))
';' => THE (i+1);
_ => scan_close_esc t (i+1)
except
INDEX_OUT_OF_BOUNDS = NULL;
esac;
# parse an "element", i.e. a thingy enclosed in '<' ... '>'
# i is supposed to be the index into t right after the opening bracket
# parseEl returns the representation of the rest of t
#
fun parse_el t i
=
{ unto = scan_close_el t i;
case unto
THE n
=>
{ el_text = to_lower_str (slice_from_to (t, i, n - 2));
els = string::tokens (char::is_space) el_text;
# We can rely on els being non-empty since
# nextIsAlpha was TRUE when calling parseEl
(elem_start (hd els, tl els)) . (parse_main t n);
};
NULL
=>
raise exception tags::error
("Can't find closing '>' after " $ (error_context (t, i - 1)));
esac;
}
also
fun parse_end_el t i
=
{ unto = scan_close_el t i;
case unto
THE n
=>
{ el_text = to_lower_str (slice_from_to (t, i, n - 2));
els = string::tokens char::is_space el_text;
# Again, els has to be non-empty.
# We could check here if there is more than
# one element and generate a warning.
# Or we could even keep a list of arguments.
(elem_end (hd els)) . (parse_main t n);
};
NULL
=>
raise exception tags::error ("Can't find closing '>' after " $ (error_context (t, i - 2)));
esac;
}
# Parse an escape sequence, starting with '&' ... ';'
# i is supposed the index into t right after the ampersand
#
also
fun parse_esc t i
=
{ unto = scan_close_esc t i;
case unto
THE n
=>
escape (substring::string (slice_from_to (t, i, n - 2)))
. (parse_main t n);
NULL # Can't find closing ;
=>
raise exception tags::error
("Can't find closing ';' after " $ (error_context (t, i - 1)));
esac;
}
also
fun parse_main t i
=
{ my (j, n, lex) = scan_next_lex t i;
rest = case lex
NULL => [];
THE open_el => parse_el t n;
THE open_esc => parse_esc t n;
THE open_end_el => parse_end_el t n; esac;
if (i <= j) (quote (slice_from_to (t, i, j))) . rest;
else rest;
fi;
};
fun parse t
=
parse_main (full t) 0;
};
# Count position within a string:
#
addpos
=
{ fun cntone (thischar, (line, char))
=
if (string_util::is_linefeed thischar)
(line+1, 0);
else (line, char+1);
fi;
substring::fold_forward cntone;
};
# Like split, but stop after the first element satisfying p:
#
fun splitfirst p []
=>
(NULL, []);
splitfirst p (x . xs)
=>
if (p x)
(THE x, xs);
else
my (f, r) = splitfirst p xs;
(f, x . r);
fi;
end;
# The four components of the consEl's second argument are the following:
# - the first is the stack of unprocessed open elements, along with their
# position within the text;
# - the second is current position within the text;
# - the third is the text content up to here;
# - and the last is the list of text_items built up to here.
#
# As it stands, opening elements with no matching close are discarded.
# This can be changed easily.
fun cons_el wid (quote q, (oe, c, s, al))
=>
(oe, addpos c q, s$(substring::string q), al);
cons_el wid (escape e, (oe, c, s, al))
=>
( case (tags::escape e)
THE esc
=>
{ estr= tags::text_for_esc esc;
nuc = addpos c (substring::from_string estr);
ean = tags::annotation_for_esc esc (MARK c,
MARK nuc);
(oe, nuc, s$estr,
case ean THE t=> t . al; NULL=> al; esac);
};
NULL
=>
{ estr=
case e # the three predefined escape seqs
"amp" => "&";
"lt" => "<";
"gt" => ">";
_ =>
{ tags::warning ("Unknown escape sequence '" + e +
"' (left untouched).");
"&" + e + ";";}; esac;
(oe, addpos c (substring::from_string estr), s + estr, al);
}; esac);
cons_el wid (elem_start els, (oe, c, s, al))
=>
((els, c) . oe, c, s, al);
cons_el wid (elem_end el, (oe, c, s, al))
=>
{ my (m, rest)
=
splitfirst (\\ ((nm, args), _) => nm == el; end ) oe;
case m
NULL =>
{ tags::warning ("Closing tag '<" + el + ">' doesn't match any opening tag");
(oe, c, s, al);
};
THE ((tgnm, args), pos)
=>
case (tags::matching_tag tgnm)
THE tg =>
({ nuan= tags::text_item_for_tag
tg args wid
(MARK pos, MARK c);
(rest, c, s, nuan . al);
}
except (tags::TEXT_ITEM_ERROR str) =>
{ tags::warning str;
(rest, c, s, al);}; end );
NULL =>
{ tags::warning ("Unknown tag <" + tgnm + "> ignored.");
(rest, c, s, al);
};
esac;
esac;
};
end;
Widget_Info
=
tags::Widget_Info;
fun get_livetext wid str
=
{ my (open_els, (cols, rows), text, anns)
=
fold_forward
(cons_el wid)
([], (1, 0), "", [])
(parser::parse str);
if (length open_els > 0)
tags::warning "Unclosed open elements found.";
fi;
LIVE_TEXT { len => THE (cols, rows),
str => text,
text_items => anns
};
};
};