src/lib/tk/src/toolkit/tk-markup-g.pkg

15.4.1336 src/lib/tk/src/toolkit/tk-markup-g.pkg

## tk-markup-g.pkg
## (C) 1996, 1998, Bremen Institute for Safe Systems, Universitaet Bremen
## Author: cxl (Last modification by $Author: 2cxl $)

# Compiled by:
#     src/lib/tk/src/toolkit/sources.sublib

# ***************************************************************************
#
# tk Generic Markup Language: writing down annotated texts.
#
# This module allows one to write down texts with embedded text_items in an
# SGML-like format.
#
# See standard-markup-tags-g.pkg for a full-fledged instantiation of this generic
# markup language, and tests+examples/markup_ex.pkg for a wee example.
#
# $Date: 2001/03/30 13:39:46 $
# $Revision: 3.0 $
#
#
# **************************************************************************

###         "The theoretical broadening which comes
###          from having many humanities subjects
###          on the campus is offset by the general dopiness
###          of the people who study these things."
###
###                          -- Richard P. Feynman

generic package tk_markup_g (tags: Tags)                # Tags  is from   src/lib/tk/src/toolkit/markup.api

: (weak) Tk_Markup                                              # Tk_Markup     is from   src/lib/tk/src/toolkit/markup.api
#  where type Widget_Info = tags::Widget_Info

{

    include package   tk;
    include package   basic_utilities;

    #
    # This defines the abstract syntax of a text with text_items in it.
    # I won't bore you with a BNF, but roughly the syntax is as follows:
    #
    # elemStart (nm, a1, ... an) is  <nm a1 ... an>
    #    -- start of an "element" in SGML-speak.
    #       Note there must be  _no space_ betweem
    #       the opening < and the name nm, and nm has to start with _letter_;
    #       a1 to an are the arguments of the element
    #
    # elemEnd nm is <\nm>
    #    -- the end of an element
    #
    # escape e is  &str;
    #    -- the escape e denoted by str
    #
    # quote str is just the string str

     An_Text_El =  ELEM_START      (String, List( String ))
                       | QUOTE      Substring
                       | ESCAPE     String
                       | ELEM_END    String;

     Annotated_Text = List( An_Text_El );

   # *******************************************************************
   #
   # The parser
   #
   # The parser is extremely tolerant; it doesn't generate any errors, and
   # if it can't decipher something it will just leave it as a verbal
   # quote.

   package parser
       =
       package {

           include package   substring;

            # The lexical elements

            Lexem = OPEN_EL | OPEN_END_EL | OPEN_ESC;
                            # corresponding to <, <\ and &;  plus > and ;
                            # which only become a lexem after one of these

           #  handy slices
           fun slice_from_to (t, i, n)= slice (t, i, THE (n-i+1));
           fun slice_to_end (t, i)    = slice (t, i, NULL);

           # convert string to lowercase:

           fun to_lower_str substr
               =
               string::implode (map char::to_lower (substring::explode substr));

           fun next_is_alpha t i
               =
               char::is_alpha (sub (t, i+1))
               except
                   INDEX_OUT_OF_BOUNDS = FALSE;

           fun error_context (t, i)
               =
               {   t= slice_to_end (t, i);

                   "'" $ (if (size t  < 25  )  string t;
                          else                (string (slice (t, 0, THE (25))) $ "...");fi) $ "'";
               };

           #  find first valid occurence of a lexem in t, starting from index i

           # Return either the index into the string right after the lexem, and
           # the lexem, if there is one; or NULL and the index to the end of the
           # string:
           #
           fun scan_next_lex t i
               =
               case (sub (t, i))

                     # & and < are only valid lexemes
                     # if followed by a letter:

                    '&' =>  if (next_is_alpha t i)   (i - 1, i+1, THE open_esc);
                            else                     scan_next_lex t (i+1);
                            fi;

                   '<'  =>  if (next_is_alpha t i)

                                 (i - 1, i+1, THE open_el);
                            else
                                 if ((sub (t, i+1)) == '\\')

                                      if (next_is_alpha t (i+1))

                                           (i - 1, i+2, THE open_end_el);
                                      else
                                           scan_next_lex t (i+2);
                                      fi;
                                 else
                                      scan_next_lex t (i+1);
                                 fi;
                            fi;

                    _   => scan_next_lex t (i+1);
                esac
                except
                    INDEX_OUT_OF_BOUNDS
                        =
                        (i - 1, i - 1, NULL);

           # Have passed end of string

           # Find next occurence of >
           #
           fun scan_close_el t i
               =
               case (sub (t, i))

                    '>' => THE (i+1);

                     _  => scan_close_el t (i+1)
                           except
                               INDEX_OUT_OF_BOUNDS =  NULL;
               esac;

           fun scan_close_esc t i
               =
               case (sub (t, i))

                    ';' => THE (i+1);

                     _  => scan_close_esc t (i+1)
                           except
                               INDEX_OUT_OF_BOUNDS =  NULL;
               esac;

           # parse an "element", i.e. a thingy enclosed in '<' ... '>'
           # i is supposed to be the index into t right after the opening bracket
           # parseEl returns the representation of the rest of t
           #
           fun parse_el t i
               =
               {   unto = scan_close_el t i;

                   case unto

                        THE n
                            =>
                           {   el_text = to_lower_str (slice_from_to (t, i, n - 2));

                               els     = string::tokens (char::is_space) el_text;

                                   # We can rely on els being non-empty since
                                   # nextIsAlpha was TRUE when calling parseEl

                               (elem_start (hd els, tl els)) . (parse_main t n);
                           };

                        NULL
                            =>
                            raise exception tags::error
                               ("Can't find closing '>' after " $ (error_context (t, i - 1)));
                   esac;
               }

           also
           fun parse_end_el t i
               =
               {   unto = scan_close_el t i;

                   case unto

                        THE n
                            =>
                            {   el_text = to_lower_str (slice_from_to (t, i, n - 2));

                                els     = string::tokens char::is_space el_text;

                                    # Again, els has to be non-empty.
                                    # We  could check here if there is more than
                                    # one element and generate a warning.
                                    # Or we could even keep a list of arguments.

                                (elem_end (hd els))  . (parse_main t n);
                            };

                        NULL
                            =>
                            raise exception tags::error ("Can't find closing '>' after " $ (error_context (t, i - 2)));
                   esac;
               }

           # Parse an escape sequence, starting with '&' ... ';'
           # i is supposed the index into t right after the ampersand
           #
           also
           fun parse_esc t i
               =
               {   unto = scan_close_esc t i;

                   case unto

                        THE n
                            =>
                            escape (substring::string (slice_from_to (t, i, n - 2)))
                            . (parse_main t n);

                        NULL #  Can't find closing ;
                            =>
                            raise exception tags::error
                                ("Can't find closing ';' after " $ (error_context (t, i - 1)));
                   esac;
               }

           also
           fun parse_main t i
               =
               {   my (j, n, lex) = scan_next_lex t i;

                   rest = case lex

                               NULL           => [];
                              THE open_el    => parse_el t n;
                              THE open_esc   => parse_esc t n;
                              THE open_end_el => parse_end_el t n; esac;

                   if (i <= j)   (quote (slice_from_to (t, i, j))) . rest;
                   else          rest;
                   fi;
               };

           fun parse t
               =
               parse_main (full t) 0;
       };

    # Count position within a string:
    #
    addpos
        =
        {   fun cntone (thischar, (line, char))
                =
                if   (string_util::is_linefeed thischar)

                     (line+1, 0);
                else (line, char+1);
                fi;

            substring::fold_forward cntone;
        };

    # Like split, but stop after the first element satisfying p:
    #
    fun splitfirst p []
            =>
            (NULL, []);

        splitfirst p (x . xs)
            =>
            if  (p x)

                (THE x, xs);
            else
                my (f, r) =   splitfirst p xs;

                (f, x . r);
            fi;
    end;

    # The four components of the consEl's second argument are the following:
    # - the first is the stack of unprocessed open elements, along with their
    #   position within the text;
    # - the second is current position within the text;
    # - the third is the text content up to here;
    # - and the last is the list of text_items built up to here.
    #
    # As it stands, opening elements with no matching close are discarded.
    # This can be changed easily.

    fun cons_el wid (quote q, (oe, c, s, al))
        =>
        (oe, addpos c q, s$(substring::string q), al);

       cons_el wid (escape e, (oe, c, s, al))
        =>
        (   case (tags::escape e)

                 THE esc
                 =>
                 { estr= tags::text_for_esc esc;
                     nuc = addpos c (substring::from_string estr);
                     ean = tags::annotation_for_esc esc (MARK c,
                                                          MARK nuc);
                   (oe, nuc, s$estr,
                      case ean    THE t=> t . al;  NULL=> al; esac);
                 };

                NULL
                 =>
                 { estr=
                     case e    #  the three predefined escape seqs
                         "amp" => "&";
                        "lt"  => "<";
                        "gt"  => ">";
                         _    =>
                         { tags::warning ("Unknown escape sequence '" + e +
                                      "' (left untouched).");
                          "&" + e + ";";}; esac;
                  (oe, addpos c (substring::from_string estr), s + estr, al);
                 }; esac);

       cons_el wid (elem_start els, (oe, c, s, al))
            =>
            ((els, c) . oe, c, s, al);

       cons_el wid (elem_end el, (oe, c, s, al))
            =>
            {   my (m, rest)
                    =
                    splitfirst (\\ ((nm, args), _) => nm == el; end ) oe;

                case m
                    NULL =>
                        { tags::warning ("Closing tag '<" + el + ">' doesn't match any opening tag");
                          (oe, c, s, al);
                        };

                    THE ((tgnm, args), pos)
                        =>
                        case (tags::matching_tag tgnm)

                            THE tg =>
                                ({ nuan= tags::text_item_for_tag
                                                    tg args wid
                                                      (MARK pos, MARK c);
                                   (rest, c, s, nuan . al);
                                 }
                                 except (tags::TEXT_ITEM_ERROR str) =>
                                       { tags::warning str;
                                        (rest, c, s, al);}; end );
                           NULL =>
                                {   tags::warning ("Unknown tag <" + tgnm + "> ignored.");
                                    (rest, c, s, al);
                                };
                        esac;
                esac;
            };
    end;

    Widget_Info
        =
        tags::Widget_Info;

    fun get_livetext wid str
        =
        {   my (open_els, (cols, rows), text, anns)
                =
                fold_forward
                    (cons_el wid)
                    ([], (1, 0), "", [])
                    (parser::parse str);

            if (length open_els  >  0)
                 tags::warning "Unclosed open elements found.";
            fi;

            LIVE_TEXT { len        =>  THE (cols, rows),
                        str        =>  text,
                        text_items =>  anns
                      };
        };
};

Comments and suggestions to: bugs@mythryl.org