PreviousUpNext

15.4.573  src/lib/compiler/front/parser/raw-syntax/raw-syntax-junk.pkg

## raw-syntax-junk.pkg

# Compiled by:
#     src/lib/compiler/front/parser/parser.sublib



###            "He wrapped himself in quotations -- as a beggar
###             would enfold himself in the purple of Emperors."
###
###                                    -- Rudyard Kipling



stipulate
    package em  =  error_message;                                       # error_message         is from   src/lib/compiler/front/basics/errormsg/error-message.pkg
#   package fix =  fixity;                                              # fixity                is from   src/lib/compiler/front/basics/map/fixity.pkg
    package hs  =  hash_string;                                         # hash_string           is from   src/lib/src/hash-string.pkg
#   package pj  =  print_junk;                                          # print_junk            is from   src/lib/compiler/front/basics/print/print-junk.pkg
    package raw =  raw_syntax;                                          # raw_syntax            is from   src/lib/compiler/front/parser/raw-syntax/raw-syntax.pkg
    package sy  =  symbol;                                              # symbol                is from   src/lib/compiler/front/basics/map/symbol.pkg
herein

    package   raw_syntax_junk
    : (weak)  Raw_Syntax_Junk                                           # Raw_Syntax_Junk       is from   src/lib/compiler/front/parser/raw-syntax/raw-syntax-junk.api
    {
        post_dotdot_hash  = hs::hash_string "_..";
        dotdot_hash       = hs::hash_string  "..";

        post_plusplus_hash= hs::hash_string "_++";
        plusplus_hash     = hs::hash_string  "++";

        post_dashdash_hash= hs::hash_string "_--";
        dashdash_hash     = hs::hash_string  "--";

        preamper_hash     = hs::hash_string  "&_";
        amper_hash        = hs::hash_string  "&";
        postamper_hash    = hs::hash_string "_&";

        preatsign_hash    = hs::hash_string  "@_";
        atsign_hash       = hs::hash_string  "@";
        postatsign_hash   = hs::hash_string "_@";

        preback_hash      = hs::hash_string  "\\_";
        back_hash         = hs::hash_string  "\\";
        postback_hash     = hs::hash_string "_\\";

        prebang_hash      = hs::hash_string  "!_";
        bang_hash         = hs::hash_string  "!";
        postbang_hash     = hs::hash_string "_!";

        prebar_hash       = hs::hash_string  "|_";
        bar_hash          = hs::hash_string  "|";
        postbar_hash      = hs::hash_string "_|";

        prebuck_hash      = hs::hash_string  "$_";
        buck_hash         = hs::hash_string  "$";
        postbuck_hash     = hs::hash_string "_$";

        precaret_hash     = hs::hash_string  "^_";
        caret_hash        = hs::hash_string  "^";
        postcaret_hash    = hs::hash_string "_^";

        predash_hash      = hs::hash_string  "-_";
        dash_hash         = hs::hash_string  "-";
        postdash_hash     = hs::hash_string "_-";

        preplus_hash      = hs::hash_string  "+_";
        plus_hash         = hs::hash_string  "+";
        postplus_hash     = hs::hash_string "_+";

        preslash_hash     = hs::hash_string  "/_";
        slash_hash        = hs::hash_string  "/";
        postslash_hash    = hs::hash_string "_/";

        prestar_hash      = hs::hash_string  "*_";              #    "The fault, dear Brutus, is not in our stars,
        star_hash         = hs::hash_string  "*";               #     But in ourselves, that we are underlings."
        poststar_hash     = hs::hash_string "_*";               #            -- William Shakespeare, "Julius Caesar" 

        pretilda_hash     = hs::hash_string  "~_";
        tilda_hash        = hs::hash_string  "~";
        posttilda_hash    = hs::hash_string "_~";

        preqmark_hash     = hs::hash_string  "?_";
        qmark_hash        = hs::hash_string  "?";
        postqmark_hash    = hs::hash_string "_?";

        prepercnt_hash    = hs::hash_string  "%_";
        percnt_hash       = hs::hash_string  "%";
        postpercnt_hash   = hs::hash_string "_%";

        prelangle_hash    = hs::hash_string "<_";
        langle_hash       = hs::hash_string  "<";

        prelbrace_hash    = hs::hash_string "{_";
        lbrace_hash       = hs::hash_string  "{";

        postrangle_hash   = hs::hash_string "_>";
        rangle_hash       = hs::hash_string  ">";

        postrbrace_hash   = hs::hash_string "_}";
        rbrace_hash       = hs::hash_string  "}";

        postlbracket_hash = hs::hash_string "_[";
        lbracket_hash     = hs::hash_string "[";

        equal_hash        = hs::hash_string "=";
        eqeq_hash         = hs::hash_string "==";
        bar_hash          = hs::hash_string "|";
        weakdot_hash      = hs::hash_string " . ";
        bogus_hash        = hs::hash_string "BOGUS";
        dollar_bogus_hash = hs::hash_string "$BOGUS";
        barens_hash       = hs::hash_string "|_|";

        field_hash        = hs::hash_string "field";
        generic_hash      = hs::hash_string "generic";
        get_fields_hash   = hs::hash_string "get__fields";
        in_hash           = hs::hash_string "in";
        include_hash      = hs::hash_string "include";
        infix_hash        = hs::hash_string "infix";
        infixr_hash       = hs::hash_string "infixr";
        message_hash      = hs::hash_string "message";
        method_hash       = hs::hash_string "method";
        nonfix_hash       = hs::hash_string "nonfix";
        overloaded_hash   = hs::hash_string "overloaded";
        raise_hash        = hs::hash_string "raise";
        recursive_hash    = hs::hash_string "recursive";

        post_dotdot_string  = "_..";
        dotdot_string       = "..";

        post_plusplus_string= "_++";
        plusplus_string     = "++";

        post_dashdash_string= "_--";
        dashdash_string     = "--";

        preamper_string   = "&_";
        preatsign_string  = "@_";
        preback_string    = "\\_";
        prebang_string    = "!_";
        prebar_string     = "|_";
        prebuck_string    = "$_";
        precaret_string   = "^_";
        predash_string    = "-_";
        prepercnt_string  = "%_";
        preplus_string    = "+_";
        preqmark_string   = "?_";
        preslash_string   = "/_";
        prestar_string    = "*_";
        pretilda_string   = "~_";

        prelangle_string  = "<_";
        langle_string     = "<";

        prelbrace_string  = "{_";
        lbrace_string     = "{";

        postrangle_string = "_>";
        rangle_string     =  ">";

        postrbrace_string = "_}";
        rbrace_string     =  "}";

        postlbracket_string = "_[";
        lbracket_string     =  "[";

        amper_string      = "&";
        atsign_string     = "@";
        back_string       = "\\";
        bang_string       = "!";
        bar_string        = "|";
        buck_string       = "$";
        caret_string      = "^";
        dash_string       = "-";
        percnt_string     = "%";
        plus_string       = "+";
        qmark_string      = "?";
        slash_string      = "/";
        star_string       = "*";
        tilda_string      = "~";

        postamper_string  = "_&";
        postatsign_string = "_@";
        postback_string   = "_\\";
        postbang_string   = "_!";
        postbar_string    = "_|";
        postbuck_string   = "_$";
        postcaret_string  = "_^";
        postdash_string   = "_-";
        postpercnt_string = "_%";
        postplus_string   = "_+";
        postqmark_string  = "_?";
        postslash_string  = "_/";
        poststar_string   = "_*";
        posttilda_string  = "_~";

        equal_string        = "=";
        eqeq_string         = "==";
        bar_string          = "|";
        weakdot_string      = " . ";
        bogus_string        = "BOGUS";
        dollar_bogus_string = "$BOGUS";
        barens_string       = "|_|";

        postbang_string   = "_!";

        field_string      = "field";
        generic_string    = "generic";
        get_fields_string = "get__fields";
        in_string         = "in";
        include_string    = "include";
        infix_string      = "infix";
        infixr_string     = "infixr";
        message_string    = "message";
        method_string     = "method";
        nonfix_string     = "nonfix";
        overloaded_string = "overloaded";
        raise_string      = "raise";
        recursive_string  = "recursive";

        void_pattern    = raw::RECORD_PATTERN { definition => NIL,   is_incomplete => FALSE };
        void_expression = raw::RECORD_IN_EXPRESSION NIL;

        true_valcon  = [sy::make_value_symbol "TRUE"];
        false_valcon = [sy::make_value_symbol "FALSE"];
        quote_valcon = [sy::make_package_symbol "Lib7", sy::make_value_symbol "QUOTE"];

        antiquote_valcon = [sy::make_package_symbol "Lib7", sy::make_value_symbol "ANTIQUOTE"];
        arrow_type      = sy::make_type_symbol "->";

        exception_id   =   sy::make_type_symbol "Exception";
        sym_arg        =   sy::make_package_symbol "<Parameter>";
        bogus_id       =   sy::make_value_symbol "BOGUS";
        it_symbol      = [ sy::make_value_symbol "it" ];

        # 2007-12-31 CrT: This check used to limit fixity
        #                 precedences to the range 0-9;
        #                 I cannot find any particular
        #                 reason in the code for this, and
        #                 have relaxed it.

        fun check_fixity (fixity, err)
            =
            if (fixity < 0  or  fixity > 99)
                #
                err  em::ERROR "fixity precedence must be between 0 and 99"  em::null_error_body;
                99;
            else
                fixity;
            fi;

        # Layered patterns:
        #
        fun lay3 ((x as raw::VARIABLE_IN_PATTERN _), y, _)
                 =>
                 raw::AS_PATTERN { variable_pattern => x,   expression_pattern => y };

            lay3 (raw::TYPE_CONSTRAINT_PATTERN { pattern, type_constraint }, y, err)
                => 
                {   err  em::ERROR "illegal (multiple?) type constraints in AS pattern"  em::null_error_body;

                    case (lay3 (pattern, y, err))
                        #                       
                        raw::AS_PATTERN { variable_pattern, expression_pattern }
                            =>
                            raw::AS_PATTERN
                              {
                                variable_pattern,

                                expression_pattern
                                    =>
                                    raw::TYPE_CONSTRAINT_PATTERN
                                      {
                                        pattern => expression_pattern,
                                        type_constraint
                                      }
                              };

                        other => other;
                    esac;
                };

            lay3 (raw::SOURCE_CODE_REGION_FOR_PATTERN (x, _), y, err)
                =>
                lay3 (x, y, err);

            lay3 (raw::PRE_FIXITY_PATTERN [x], y, err)
                =>
                {   err  em::ERROR "parentheses illegal around variable in AS pattern"  em::null_error_body;
                    y;
                };

            lay3 (x, y, err)
                =>
                {    err  em::ERROR "pattern to left of AS must be variable"  em::null_error_body;
                     y;
                };
        end;

        fun lay2 (raw::TYPE_CONSTRAINT_PATTERN { pattern, type_constraint }, y, err)
                => 
                {   err  em::ERROR "illegal (multiple?) type constraints in AS pattern"  em::null_error_body;

                    case (lay2 (pattern, y, err))
                        #
                        raw::AS_PATTERN { variable_pattern, expression_pattern }
                            =>
                            raw::AS_PATTERN
                              { variable_pattern,
                                expression_pattern
                                    =>
                                    raw::TYPE_CONSTRAINT_PATTERN
                                      { pattern        => expression_pattern,
                                        type_constraint
                                      }
                              };

                        pattern => pattern;
                    esac;
                };

            lay2 (raw::SOURCE_CODE_REGION_FOR_PATTERN (x, _), y, err)
                =>
                lay2 (x, y, err);

            lay2 (raw::PRE_FIXITY_PATTERN [ { item, ... } ], y, err)
                =>
                lay3 (item, y, err);

            lay2 p
                =>
                lay3 p;
        end;

        fun lay (raw::TYPE_CONSTRAINT_PATTERN { pattern, type_constraint }, y, err)
                => 
                case (lay2 (pattern, y, err))
                    #
                    raw::AS_PATTERN { variable_pattern, expression_pattern }
                        =>
                        raw::AS_PATTERN
                          { variable_pattern,
                            expression_pattern
                                =>
                                raw::TYPE_CONSTRAINT_PATTERN
                                  { pattern => expression_pattern,
                                    type_constraint
                                  }
                          };

                    pattern => pattern;
                esac;


            lay (raw::SOURCE_CODE_REGION_FOR_PATTERN (x, _), y, err)
                =>
                lay (x, y, err);

            lay p =>   lay2 p;
        end;

        layered = lay;

        #  Sequence of declarations 
        #
        fun make_declaration_sequence (raw::SEQUENTIAL_DECLARATIONS a, raw::SEQUENTIAL_DECLARATIONS b) =>  raw::SEQUENTIAL_DECLARATIONS (  a  @  b  );
            make_declaration_sequence (raw::SEQUENTIAL_DECLARATIONS a,                              b) =>  raw::SEQUENTIAL_DECLARATIONS (  a  @ [b] );
            make_declaration_sequence (                             a, raw::SEQUENTIAL_DECLARATIONS b) =>  raw::SEQUENTIAL_DECLARATIONS (  a  !  b  );
            make_declaration_sequence (                             a,                              b) =>  raw::SEQUENTIAL_DECLARATIONS  [ a,    b ];
        end;


        fun block_to_let
                block_declarations_and_expressions2             # THIS IS IN REVERSE ORDER!
            =
            # This is where we deal with the mismatch between
            # our block-structured surface syntax and the LET-oriented
            # raw-syntax.api view of the world.  Depending on what is in
            #     block_declarations_and_expressions
            # we synthesize either LET statement or a simple expression.
            #
            # Note that
            #     block_declarations_and_expressions
            # is in reverse order, which is convenient given that
            # what matters most is whether the last statement lexically
            # was an expression or a declaration.
            #
            # In the first case below, the block consists of a single
            # expression.  We strip the expression of its wrapping and
            # return it.
            #
            # In the second case below, the block contains more
            # than one statement, and the last is an expression.
            # We construct and return a LET holding it all.
            #
            # In all other cases we don't have a terminal expression
            # to yield a value for the block, so we create and return
            # a LET with void_expression as its value.
            #
            case block_declarations_and_expressions2
                #
                [   raw::SOURCE_CODE_REGION_FOR_DECLARATION (
                         raw::VALUE_DECLARATIONS (
                             [   raw::NAMED_VALUE {
                                     expression,
                                     pattern    =>   raw::WILDCARD_PATTERN,
                                     ...
                                 }
                             ],
                             NIL
                         ),
                         _
                    )
                ]
                    =>
                    expression;

               raw::SOURCE_CODE_REGION_FOR_DECLARATION (
                    raw::VALUE_DECLARATIONS (
                        [   raw::NAMED_VALUE {
                                expression,
                                pattern    =>  raw::WILDCARD_PATTERN,
                                ...
                            }
                        ],
                        NIL
                    ),
                    _
                ) ! rest
                    =>
                    raw::LET_EXPRESSION {
                        expression,
                        declaration =>  raw::SEQUENTIAL_DECLARATIONS (reverse rest)
                    };

               rest
                    =>
                    raw::LET_EXPRESSION {
                        expression  =>  void_expression,
                        declaration =>  raw::SEQUENTIAL_DECLARATIONS (reverse rest)
                    };
            esac;

        fun quote_expression s
            =
            raw::APPLY_EXPRESSION
              {
                function =>  raw::VARIABLE_IN_EXPRESSION  quote_valcon,
                argument =>  raw::STRING_CONSTANT_IN_EXPRESSION  s
              };

        fun antiquote_expression e
            =
            raw::APPLY_EXPRESSION {
                function =>  raw::VARIABLE_IN_EXPRESSION antiquote_valcon,
                argument =>  e
            };


        # Two little fns for use in rule actions, which
        # annotate syntax expression and declaration trees
        # with the corresponding source file line+column
        # number range (s).
        #
        # They do nothing if the tree is already so annotated:
        #
        fun mark_expression (e as raw::SOURCE_CODE_REGION_FOR_EXPRESSION    _, _, _) =>  e;
            mark_expression (e,                                                a, b) =>  raw::SOURCE_CODE_REGION_FOR_EXPRESSION (e, (a, b));
        end;

        fun mark_declaration (d as raw::SOURCE_CODE_REGION_FOR_DECLARATION _, _, _) =>  d;
            mark_declaration (d,                                              a, b) =>  raw::SOURCE_CODE_REGION_FOR_DECLARATION (d, (a, b));
        end;


        # Fake up a
        #     my _ = ...
        # by hand to make an expression
        # look like a declaration:
        #
        fun expression_to_declaration
                (expression, left, right)
            =
            mark_declaration (
                raw::VALUE_DECLARATIONS (
                    [   raw::NAMED_VALUE {
                            expression,
                            pattern    =>  raw::WILDCARD_PATTERN,
                            is_lazy    =>  FALSE
                        }
                    ],
                    NIL
                ),
                left,
                right
            );


        # This fn is called (only) from:
        #
        #     src/lib/compiler/toplevel/interact/read-eval-print-loop-g.pkg
        #
        fun extract_toplevel_declarations  (dec: raw::Declaration)                                      # "dec" == "declaration".
            =
            reap_toplevel_statements (dec, [])                                                          # See bottom-of-fn comments.
            where
                fun reap_toplevel_statements (dec,  results)
                    =
                    case dec
                        #
                        raw::SOURCE_CODE_REGION_FOR_DECLARATION                                         # This pattern is more fragile than one would like,
                          (                                                                             # since it depends on just where sourcecode region
                            raw::SEQUENTIAL_DECLARATIONS                                                # info is/not inserted by the parser, but making it
                              [ dec1 as raw::SOURCE_CODE_REGION_FOR_DECLARATION                         # more robust does not seem like a cost-effective use
                                          (                                                             # of programming time just now.  -- 2012-01-22 CrT
                                            raw::VALUE_DECLARATIONS
                                              ( [ raw::NAMED_VALUE { pattern => raw::VARIABLE_IN_PATTERN [ it_symbol ], ... } ],
                                                _
                                              ),
                                            region'
                                          ),
                                dec2
                              ],
                            region
                          )
                            =>
                            reap_toplevel_statements (dec2,  raw::SOURCE_CODE_REGION_FOR_DECLARATION (dec1, region') ! results);

                        _   =>  reverse  (dec ! results);
                    esac;
            end;
            #
            # Given a raw_syntax::Declaration equivalent to
            #
            #     my it = foo ();
            #     my it = bar ();
            #     my it = zot ();
            #     ...
            #
            # we return a list of the individual declarations.
            # The immediate motivation for this is that in
            #
            #     src/lib/compiler/front/parser/yacc/mythryl.grammar
            #
            # we define
            #
            #     toplevel_declarations:    toplevel_declaration SEMI
            #                          |    toplevel_declaration SEMI toplevel_declarations
            #
            # which means that a multi-statement script of statements
            # as above will parse as a single toplevel statement, because
            # YACC defaults to returning the longest possible syntactically
            # valid parse.
            #
            # This is a problem when processing scripts because we need
            # to be able to do something like
            #
            #     #!/usr/bin/mythryl
            #     load "foo.lib";
            #     foo::bar();
            #
            # where the 'load' statement adds package "foo" to the global
            # environment for use in the rest of the script;  if both
            # lines are compiled as a unit, the global environment will
            # be updated too late, and foo:: will come up as an undefined
            # library.
            #
            # We deal with this in
            #
            #     src/lib/compiler/toplevel/interact/read-eval-print-loop-g.pkg
            #
            # by post-parse breaking up the raw syntax parsetree into
            # its logical constituents and compiling them separately;
            # the function here is support for that.
            #
            # In actual raw-syntax format, syntax such as
            #
            #     my it = foo ();
            #     my it = bar ();
            #     my it = zot ();
            #     ...
            #
            # come out looking like
            #
            #     SEQ[ it=foo();
            #          SEQ[ it=bar();
            #               SEQ[ it=zot();
            #                    ...
            #        ]    ]    ]
            #
            # or in more exhaustive detail
            #
            #     SOURCE_CODE_REGION_FOR_DECLARATION <...> 
            #          SEQUENTIAL_DECLARATIONS[
            #           SOURCE_CODE_REGION_FOR_DECLARATION <...>    VALUE_DECLARATIONS [ NAMED_VALUE[ VARIABLE_IN_PATTERN it = ... ]NAMED_VALUE ]VALUE_DECLARATIONS 
            #              ;SEQUENTIAL_DECLARATIONS
            #                        SOURCE_CODE_REGION_FOR_DECLARATION <...> 
            #                        SEQUENTIAL_DECLARATIONS[
            #                            SOURCE_CODE_REGION_FOR_DECLARATION <...>  VALUE_DECLARATIONS [ NAMED_VALUE[ VARIABLE_IN_PATTERN it = ... ]NAMED_VALUE ]VALUE_DECLARATIONS 
            #                             ...                                                          
            #
            # so basically we need to drill down through the sourcecode region
            # info and take the first element of each decl-sequence and return
            # a list of the results.
    };                  # package raw_syntax_junk
end;



Comments and suggestions to: bugs@mythryl.org

PreviousUpNext