PreviousUpNext

15.4.640  src/lib/compiler/front/typer/main/resolve-operator-precedence.pkg

## resolve-operator-precedence.pkg 

# Compiled by:
#     src/lib/compiler/front/typer/typer.sublib

####################################################
#               Motivation
#
# We allow the user to specify the precedence
# and associativity of infix operators.
#
# Rather than try to hack support for that into
# the parser proper, we have the parser pass
# expressions through in an undigested flat-list
# form, from which we can later reconstruct the
# actual appropriate syntax tree representation
# -- here.


####################################################
#               Overview
#
# Ths is the schematic problem
# we're trying to solve here:
#
# We have a set of LEAF symbols:    a b c  ...
# We have a set of BINOP symbols:   * + << ...
#
# We recognize two kinds of expressions which may
# be formed from these components:
#     f a           # Functional application of one leaf to another
#     a + b         # Application of a BINOP to two adjacent leafs.
#
# Our input is a flat list of LEAF and BINOP
# symbols, where the BINOP symbols are tagged
# with arbitrary precedences, and also marked
# as to whether they are right or left associative.
#
# Our task is to resolve that input list into
# a syntax tree which makes the evaluation
# relationships explicit.
#
# For example,
#
#     f a b c + g d * h e
#
# should yield approximately
#
#             +
#            / \
#           /   \
#          /     \
#         .       *
#        / \     / \
#       .   c   /   \
#      / \     .     .
#     .   b   / \   / \
#    / \     g   d h   e
#   f   a
#
# where we are using '.' to represent functional
# application of the first argument to the second.
#
# (Note in particular that '*' has higher precedence
# than '+', which is reflected in the parse tree shown.)


####################################################
#               Algorithm
#
# We use a conventional linear left-to-right scan
# over the input with a stack to hold our
# intermediate partial-syntax-tree results.
#
# Thus, at any given time we have a state consisting
# of the current stack of partial syntax trees together
# with the remaining input tokens, and we are looking
# at the next input token trying to figure out what
# to do next.
#
# Our algorithm consists of three processing rules:
#
# 0) Our default action when we don't know what else
#    to do is just to push the input token on our
#    stack and go on.
#
# 1) If there's a LEAF F on top of the stack and
#    the next input token is a LEAF A, then
#    we can combine them into a (F . A) node
#    (representing functional application of F to A)
#    and push the result back on the stack as a new
#    LEAF node.
#
# 2) If the top three items on the stack are:
#
#                  A * B
#
#    (i.e., two LEAFs separated by a BINOP)
#    and the next input token is
#
#                   +
#
#    (i.e., a lower-precedence BINOP) then we
#    can combine those top three entries into
#    a new LEAF node (A*B), push it back on
#    the stack, and continue.
#
# And that's basically all we have to do!
#
# There are a few corner cases like issuing
# error messges for syntactically illegal
# situations like a BINOP at the start or
# end of the input list, and we have to
# flush any remaining stuff out of the stack
# when we reach end of input, but basically
# the above three cases get the job done.


####################################################
#               Code context
#
# We get called (only) from two places in
#     src/lib/compiler/front/typer/main/type-core-language.pkg
# once to resolve regular executable expressions,
# once to resolve pattern-match expressions.
#
# Our 'parse' entry point is applied in two stages:
#
# (1) Called initially with a pair of functions
#     'apply' and 'pair'  with which to build up
#     the resulting parsetree, by doing
#     
#        apply (f, a)
#
#     to construct a syntax tree node
#     encoding the functional application
#     of 'f' to 'a', and doing
#
#        apply( o, pair (a, b) )
#     
#     to encode the application of BINOP 'o' to
#     arguments 'a' and 'b'.
#
#     (Passing in the syntax-tree construction
#     functions in this way makes us independent
#     of the details of syntax tree construction,
#     in particular whether we're building an
#     expression or pattern-matching syntax tree.) 
#
#     At this point we simply latch the given
#     'apply' and 'pair' functions in a closure
#     and return the closure.
#
#     Essentially, this stage produces a version
#     of our 'parse' function specialized to the
#     particular type of syntax tree construction
#     problem at hand.
#
# (2) The result of the above is then called repeatedly
#     to convert particular expressions from flat-list
#     form to fully-resolved syntax-tree form.
#
#     This call takes as input a tuple of three elements:
#
#     1.  The input expression as a flat list.
#
#         For our purposes, each entry is list just a blob
#         with precedence and associativity information.
#         (The full definition is in
#              src/lib/compiler/front/parser/raw-syntax/raw-syntax.pkg
#         )
#
#     2.  The current syntax table.
#
#     3.  A sink for error messages.
#
#     We then return the resulting syntax tree. 


api Resolve_Operator_Precedence {

     parse
         :
         {   apply: (X, X) -> X,
             pair:  (X, X) -> X
         }
         ->
         ( List( raw_syntax::Fixity_Item(X) ),                             #  Input list.                   
           symbolmapstack::Symbolmapstack,                                       #  To look up binop precedences. 
           (raw_syntax::Source_Code_Region -> error_message::Plaint_Sink)    #  Error message sink.
         )
         ->
         X;
};


stipulate
    package err=  error_message; 
    package f  =  fixity;                                                       # fixity                        is from   src/lib/compiler/front/basics/map/fixity.pkg
herein 

    package   resolve_operator_precedence
    : (weak)  Resolve_Operator_Precedence                                               # Resolve_Operator_Precedence   is from   src/lib/compiler/front/typer/main/resolve-operator-precedence.pkg
    {

        # Define our evaluation stack.
        #
        # For a LEAF we just store its syntax tree.
        #
        # For a BINOP we store the syntax tree plus
        # a precedence plus the binop symbol itself,
        # for error message purposes.
        #
        # Both of the above end with a pointer
        # to the next node in the stack:
        #
        Precedence_Stack X
          = BINOP (symbol::Symbol, Int, X, Precedence_Stack(X))
          | LEAF  (X, Precedence_Stack(X))
          | BOTTOM_OF_STACK;


        ##################################################################
        # 'parse':  Our main external entry point (stage 1).
        #
        #           All we do at this point is to latch
        #           'apply' and 'pair' in a closure,
        #           which we then return:
        #
        fun parse { apply, pair }
            =
            {
                ##################################################################
                # 'ensure_leaf': We call this function when we're in
                #                a situation, such as processing of
                #                the first input token, in which a
                #                binary infix operator is not
                #                syntactically valid.
                #
                #                If the input token -is- in fact
                #                infix, we issue an error message.
                #
                #                Either way, we push the input token
                #                on the stack as a LEAF node, and
                #                return the new stack.  
                #
                fun ensure_leaf (stack, (expression, f::NONFIX, _, err))
                        =>
                        LEAF (expression, stack);

                    ensure_leaf (stack, (expression, f::INFIX _, THE symbol, err))
                        => 
                        {   err
                                err::ERROR
                                (   "expression or pattern begins with infix identifier \"" 
                                +   symbol::name symbol
                                +   "\""
                                )
                                err::null_error_body;

                            LEAF (expression, stack);
                        };

                    ensure_leaf _
                        =>
                        err::impossible "precedence: ensureLEAF";
                end;



                ##################################################################
                # 'parse_item':  Process one input item.
                #
                # First argument is our current result-expression stack.
                # Second argument is current input item to process. 
                # We return the updated result-expression stack.
                #
                # Basically, we just compare the LEAF/BINOP type
                # (and in the latter case, precedence) of the input
                # token with those of the top one or two entries
                # on the expression stack and then do the obvious.
                # (See algorithm discussion at top of file.)



                    ######################
                    #  [...LEAF]-LEAF case: Two consecutive non-infix expressions
                    #                           represent function application of
                    #                           first to second:
                    #
                fun parse_item (

                        LEAF (expression1, rest_of_stack),

                        (expression2, f::NONFIX, _, err)
                    )
                        =>
                        LEAF (apply (expression1, expression2), rest_of_stack);



                    ######################
                    #  [...BINOP]-* case:  We don't allow two binary
                    #                      operators in a row, so when
                    #                      we have an BINOP at top of
                    #                      stack, issue an error message
                    #                      if the input token is also BINOP.
                    #
                    #                      Either way, push the input token
                    #                      on the stack as a LEAF node
                    #                      and return the new stack:
                    #
                    parse_item (stack as BINOP _, token)
                        =>
                        ensure_leaf (stack, token);



                    ######################
                    #  [...LEAF, BINOP, LEAF]-BINOP case:  In this situation, the two BINOPs
                    #                                    are competing for the right
                    #                                    to eat the LEAF between them.
                    #
                    #                                    If the first BINOP has higher precedence,
                    #                                    it can go ahead and eat the two LEAFs
                    #                                    adjacent to it.
                    #
                    #                                    If the second BINOP has higher precedence,
                    #                                    then we need to push it on the stack and
                    #                                    wait for its second argument to arrive.
                    #
                    parse_item (

                        stack as LEAF (expression1, BINOP (_, precedence2, expression2, LEAF (expression3, rest_of_stack))
                        ), 

                        (   expression4,
                            f as f::INFIX (precedence4left, precedence4right),    #  Latter two are identical except for associativity-encoding low bit. 
                            THE symbol,
                            err
                        )
                    )
                        =>
                        if   (precedence4left > precedence2)
                            
                             # Second BINOP has higher precedence, so
                             # push it on the stack to await its second
                             # argument and return the new stack:
                             #
                             BINOP (symbol, precedence4right, expression4, stack);
                        else
                             if   (precedence4left == precedence2)
                                 
                                  err
                                      err::WARNING
                                      "mixed left- and right-associative \
                                          \operators of same precedence"
                                      err::null_error_body;
                             fi;

                             #  First BINOP operator has higher precedence,
                             #  so go ahead and pop the top three stack
                             #  entries (the BINOP plus its two LEAF args),
                             #  create a syntax tree node applying the BINOP
                             #  to its two LEAF arguments, push the newly
                             #  created expression back on the stack as a new
                             #  LEAF node, and call ourselves recursively,
                             #  since  we still haven't eaten the input token:
                             #
                             parse_item (
                                 LEAF (
                                     apply (
                                         expression2,
                                         pair (expression3, expression1)
                                     ),
                                     rest_of_stack
                                 ),
                                 (   expression4,
                                     f,
                                     THE symbol,
                                     err
                                 )
                             );
                        fi;

                    ######################
                    #  [...LEAF]-BINOP case:  We can't do anything with the BINOP
                    #                         until we arrive at its second argument,
                    #                         so we just push it on the stack.
                    #
                    parse_item (

                        stack as LEAF _,

                        (   expression,
                            f::INFIX (precedence_left, precedence_right),
                            THE symbol,
                            _
                        )
                    )
                        => 
                        BINOP (symbol, precedence_right, expression, stack);

                    parse_item _
                        =>
                        err::impossible "Precedence::parse";
                end;



                ##################################################################
                # 'finish':  Clean up the stack
                #
                fun finish (
                        LEAF (
                            e1,
                            BINOP (_, _, e2, LEAF (e3, r))
                        ),
                        err
                    )
                        =>
                        finish (
                            LEAF (
                                apply (e2, pair (e3, e1)),
                                r
                            ),
                            err
                        );

                    finish (LEAF (e1, BOTTOM_OF_STACK), _)
                        =>
                        e1;

                    finish (BINOP (symbol, _, e1, LEAF (e2, p)),   err)
                        => 
                        {   err
                                err::ERROR 
                                (   "expression or pattern ends with infix identifier \"" 
                                +   symbol::name symbol
                                +   "\""
                                )
                                err::null_error_body;

                            finish (
                                LEAF (
                                    apply (e2, e1),
                                    p
                                ),
                                err
                            );
                        };

                    finish (BOTTOM_OF_STACK, err)
                        =>
                        err::impossible "Corelang::finish BOTTOM_OF_STACK";

                    finish _
                        =>
                        err::impossible "Corelang::finish";
                end;


                # The following anonymous function is our return
                # value from stage-one application (where all
                # we do is note our 'apply' and 'pair' functions).
                #
                \\  (   all_input_items as first_item ! nonfirst_items,   # The input expression as a flat list. 
                        symbolmapstack,                                     # The current symbol table.
                        error                                             # The sink for our error messages.    
                    )
                        =>
                        {   #########################################
                            #  Extract the fixity information we need
                            #  from one input item by way of lookup
                            #  in the symbol table.
                            #
                            #  Our return value is essentially the same
                            #  input item in a more convenient representation:
                            #
                            fun unpack_input_item { item, source_code_region, fixity }
                                =
                                (   item,

                                    case fixity   
                                        NULL       => f::NONFIX; 
                                        THE symbol => find_in_symbolmapstack::find_fixity_by_symbol (symbolmapstack, symbol);
                                    esac,

                                    fixity,

                                    error  source_code_region
                                );



                            #########################################
                            #  For error message purposes, figure out
                            #  our end-of-input location in the source code.
                            #
                            #  Our input is the complete flat-form input-item list:
                            #
                            fun endloc [ { source_code_region => (_, x), item, fixity } ]
                                    =>
                                    error (x, x);

                                endloc (_ ! a)  =>   endloc a;
                                endloc _        =>   err::impossible "precedence: endloc";
                            end;



                            #########################################
                            # 'loop' is our central loop-over-the-list-of-input-items function.
                            # The first item is our result-expression stack.
                            # The second argument is our list of input items remaining to be processed.
                            # Our return result is the fully resolved syntax tree.
                            #
                            fun loop (stack, input_item ! remaining_items)          #  Process one input item.     
                                    =>
                                    loop (parse_item (stack, unpack_input_item input_item), remaining_items);

                                loop (stack, NIL)                                #  No-more-input case -- done. 
                                    =>
                                    finish (stack, endloc all_input_items);
                            end;



                            #########################################
                            # 'make_initial_stack' is our start-of-the-main-loop
                            # initialization function for stage-two
                            # processing.
                            #
                            # Our 'token' input argument is the fixity
                            # info for the first token in our flat-form
                            # input-syntax list.
                            #
                            # Our result is the initial stack:
                            #
                            fun make_initial_stack token
                                =
                                ensure_leaf (BOTTOM_OF_STACK, token);


                            # Process all input and return final result: 
                            #
                            loop (make_initial_stack (unpack_input_item first_item), nonfirst_items);
                        };

                        _ => err::impossible "precedence: parse";
                end;
            };
    };                                                                          # package resolve_operator_precedence
end;                                                                            # stipulate


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext