PreviousUpNext

15.4.148  src/app/yacc/src/make-lalr-g.pkg

## make-larl-g.pkg
#  Mythryl-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi 

# Compiled by:
#     src/app/yacc/src/mythryl-yacc.lib


###            "If I had staid for other people to
###             make my tools & things for me,
###             I had never made anything of it..."
###
###                             -- Isaac Newton 



generic package make_lalr_g (

    package internal_grammar:  Internal_Grammar;        # Internal_Grammar      is from   src/app/yacc/src/internal-grammar.api
    package core:         Core;                         # Core  is from   src/app/yacc/src/core.api
    package graph:        Lr_Graph;                     # Lr_Graph      is from   src/app/yacc/src/lr-graph.api
    package look:         Look;                         # Look  is from   src/app/yacc/src/look.api

    sharing graph::core == core;
    sharing graph::internal_grammar == core::internal_grammar
                               == look::internal_grammar
                               == internal_grammar;

)
: (weak) La_Lr_Graph            # La_Lr_Graph   is from   src/app/yacc/src/la-lr-graph.api
{
    include package   rw_vector;
    include package   list;

    infix my 9 sub;

    include package   internal_grammar::grammar;
    include package   internal_grammar;
    include package   core;
    include package   graph;
    include package   look;

    package graph = graph;
    package core = core;
    package grammar= internal_grammar::grammar; # internal_grammar      is from   src/app/yacc/src/grammar.pkg
    package internal_grammar = internal_grammar;

     Tmpcore = TMPCORE   (List( (Item, Ref( List( Terminal )))), Int);
     Lcore   = LCORE     (List( (Item,      List( Terminal )) ), Int);

    fun pr_lcore (a as (symbol_to_string, nonterm_to_string, term_to_string, print))
        =
        {   print_item =   print_item (symbol_to_string, nonterm_to_string, print);

            print_lookahead =   pr_look (term_to_string, print);

            \\ (LCORE (items, state))
                =>
                {   print "\n";
                    print "state ";
                    print (int::to_string state);
                    print " :\n\n";

                    list::apply
                        (   \\ (item, lookahead)
                                =>
                                {   print "{ ";
                                    print_item item;
                                    print ", ";
                                    print_lookahead lookahead;
                                    print "}\n";
                                }; end 
                        )
                        items;
                }; end ;
        };

    exception LALR  Int;

    package item_list
        =
        list_ord_set_g (
            package {
                 Element = (Item, Ref( List( Terminal ) ));

                fun eq ((a, _), (b, _)) =   eq_item (a, b);
                fun gt ((a, _), (b, _)) =   gt_item (a, b);
            }
        );

    package nonterm_set
        =
        list_ord_set_g (
            package {
                 Element = Nonterminal;

                gt = gt_nonterm;
                eq = eq_nonterm;
            }
        );

    #  NTL: nonterms with lookahead 

    package ntl
        =
        redblack_ord_set_g (
            package {
                 Element = (Nonterminal, List( Terminal ));

                fun gt ((i, _), (j, _)) =   gt_nonterm (i, j);
                fun eq ((i, _), (j, _)) =   eq_nonterm (i, j);
            }
        );

    debug = FALSE;

    fun add_lookahead { graph, nullable, first, eop,
                            rules, produces, nonterms, eps_prods,
                            print, term_to_string, nonterm_to_string }
        =
        {   eop =   look::make_set eop;

            fun symbol_to_string (   TERMINAL t) =>      term_to_string t;
                symbol_to_string (NONTERMINAL t) =>   nonterm_to_string t;
            end;

            print = if debug  print;
                    else      \\ _ = ();
                    fi;

            pr_look = if debug  pr_look (term_to_string, print);
                      else      \\ _ = ();
                      fi;

            pr_nonterm =   print o nonterm_to_string;

            pr_rule = if debug   pr_rule (symbol_to_string, nonterm_to_string, print);
                      else       \\ _ = ();
                      fi;

            print_int =   print o (int::to_string:  Int -> String);

            print_item =   print_item (symbol_to_string, nonterm_to_string, print);

            # look_pos: position in the rhs of a rule at which we should start placing
            # lookahead ref cells, i.e. the minimum place at which A -> x .B y, where
            # B is a nonterminal and y =*=> epsilon, or A -> x. is TRUE.  Positions are
            # given by the number of symbols before the place.  The place before the first
            # symbol is 0, etc.

            stipulate
                positions = make_rw_vector (length rules, 0);

                # rule_pos: calculate place in the rhs of a rule at which we should start
                # placing lookahead ref cells
                #
                fun rule_pos (RULE { rhs, ... } )
                    =
                    case (reverse rhs)
                        #
                        NIL             =>   0;
                        (   TERMINAL t) ! r  =>   length rhs;

                        (NONTERMINAL n ! r)
                             =>
                             {   # f assumes that everything after n in the
                                 # rule has proven to be nullable so far.
                                 # Remember that the rhs has been reversed,
                                 # implying that this is TRUE initially

                                 fun f (b, (r as (TERMINAL _ ! _)))
                                         =>                                     #  A -> .z t B y, where y is nullable 
                                         length r;

                                     f (c, (NONTERMINAL b ! r))
                                         =>                                     #  A -> .z B C y 
                                         if (nullable c ) f (b, r);
                                         else length r + 1;fi;

                                     f (_, [])
                                         =>                                     #  A -> .B y, where y is nullable 
                                         0;
                                 end;

                                 f (n, r);
                             };
                    esac;

                fun check_rule (rule as RULE { num, ... } )
                    =
                    {   pos =   rule_pos rule;
                        #
                        print "look_pos: ";
                        pr_rule rule;
                        print " = ";
                        print_int pos;
                        print "\n";
                        rw_vector::set (positions, num, rule_pos rule);
                    };

                my _ = apply check_rule rules;
            herein
                fun look_pos (RULE { num, ... })
                    =
                    positions[ num ];
            end;


            #  rest_is_null: TRUE for items of the form A -> x .B y, where y is nullable 

            fun rest_is_null (ITEM { rule, dot, rhs_after=>NONTERMINAL _ ! _} )
                    =>
                    dot >= (look_pos rule);

                rest_is_null _
                    =>
                    FALSE;
            end;

            # map core to a new core including only items of the form A -> x. or
            # A -> x. B y, where y =*=> epsilon.  It also adds epsilon productions to the
            # core. Each item is given a ref cell to hold the lookahead nonterminals for
            # it.

            stipulate
                fun f (item as ITEM { rhs_after=>NIL, ... }, r)
                        =>
                        (item, REF NIL) ! r;

                    f (item, r)
                        =>
                        if   (rest_is_null item)
                            
                             (item, REF NIL) ! r;
                        else
                             r;
                        fi;
                end;
            herein
                fun map_core (c as CORE (items, state))
                    =
                    {   eps_items
                            =
                            map  (\\ rule=>(ITEM { rule, dot=>0, rhs_after=>NIL },
                                            REF (NIL:  List( Terminal ))); end 
                                 ) (eps_prods c);

                        TMPCORE (item_list::union (list::fold_backward f [] items, eps_items), state);
                    };
            end;

            new_nodes
                =
                map map_core (nodes graph);

            exception FIND;

            #  findRef: state * item -> lookahead ref cell for item 

            stipulate
                states =   rw_vector::from_list new_nodes;
                dummy  =   REF NIL;
            herein
                fun find_ref (state, item)
                    =
                    {   my TMPCORE (l, _) = states[ state ];
                        #       
                        case (item_list::find((item, dummy), l))
                            #
                            THE (_, look_ref)
                                 =>
                                 look_ref;

                            NULL =>
                                {   print "find failed: state ";
                                    print_int state;
                                    print "\nitem =\n";
                                    print_item item;
                                    print "\nactual items =\n";

                                    apply (\\ (i, _) =  {   print_item i;
                                                            print "\n";
                                                        }
                                          )
                                          l;

                                    raise exception FIND;
                                };
                        esac;
                    };
            end; 


            #  findRuleRefs: state -> rule -> lookahead refs for rule. 
            stipulate
                shift = shift graph;
            herein  
                fun find_rule_refs state (rule as RULE { rhs=>NIL, ... } )
                        =>                    #  handle epsilon productions 
                        [find_ref (state, ITEM { rule, dot=>0, rhs_after=>NIL } )];

                    find_rule_refs state (rule as RULE { rhs=>symbol ! rest, ... } )
                        =>
                        scan (shift (state, symbol), rest, pos - 1)
                        where 
                            pos =   int::max (look_pos rule, 1);
                            #
                            fun scan' (state, NIL, pos, result)
                                    =>
                                    find_ref (state, ITEM { rule, dot=>pos, rhs_after=>NIL } ) ! result;

                                scan'(state, rhs as symbol ! rest, pos, result)
                                    =>
                                    scan' (shift (state, symbol), rest, pos+1,
                                            find_ref (state, ITEM { rule, dot=>pos, rhs_after=>rhs } ) ! result);
                            end;

                            # find first item of the form A -> x .B y, where y =*=> epsilon and
                            # x is not epsilon, or A -> x.  use scan' to pick up all refs after this
                            # point

                            fun scan (state, NIL, _)
                                    =>
                                    [   find_ref (state, ITEM { rule, dot=>pos, rhs_after=>NIL } )];

                                scan (state, rhs, 0)
                                    =>
                                    scan'(state, rhs, pos, NIL);

                                scan (state, symbol ! rest, place)
                                    =>
                                    scan (shift (state, symbol), rest, place - 1);
                            end;

                        end;
                end;

            end;

            # function to compute for some nonterminal n the set of nonterminals A added
            # through the closure of nonterminal n such that n =c*=> .A x, where x is
            # nullable

            fun nonterms_w_null nt
                =
                dfs (nt, nonterm_set::empty)
                where 
                    #
                    fun collect_nonterms n
                        =
                        list::fold_backward
                            (   \\ (rule as RULE { rhs as NONTERMINAL n ! _, ... }, r)
                                        =>
                                        case (rest_is_null (ITEM { dot=>0, rhs_after=>rhs, rule } ))
                                            #
                                            TRUE  =>   n ! r;
                                            FALSE =>   r;
                                        esac;

                                  (_, r) => r;
                                end 
                            )
                            []
                            (produces n);

                    fun dfs (a as (n, r))
                        =
                        if (nonterm_set::exists a)
                            # 
                            r; 
                        else
                            list::fold_backward
                                dfs
                                (nonterm_set::set (n, r))
                                (collect_nonterms n);
                        fi;

                end;

                stipulate
                    data = make_rw_vector (nonterms, nonterm_set::empty);
                    #
                    fun f n
                        =
                        if (n != nonterms)
                            #
                            rw_vector::set (data, n, nonterms_w_null (NONTERM n));
                            f (n+1);
                        fi;

                    my _ = f 0;
                herein
                    fun nonterms_w_null (NONTERM nt)
                        =
                        data[ nt ];
                end;

                # look_info: for some nonterminal n the set of nonterms A added
                # through the closure of the nonterminal such that n =c+=> .Ax and the
                # lookahead accumlated for each nonterm A 

                fun look_info nt
                    =
                    dfs ((nt, NIL), ntl::empty)
                    where  
                        #
                        fun collect_nonterms n
                            =
                            list::fold_backward
                                (   \\ (RULE { rhs=>NONTERMINAL n ! t, ... }, r)
                                            =>
                                            case (ntl::find ((n, NIL), r))
                                                #
                                                THE (key, data) =>   ntl::set ((n, look::union (data, first t)), r);
                                                NULL            =>   ntl::set ((n, first t), r);
                                            esac;

                                        (_, r) =>   r;
                                    end 
                                )
                                ntl::empty
                                (produces n);

                        fun dfs (a as ((key1, data1), r))
                            =
                            case (ntl::find a)
                                #
                                THE (_, data2) =>   ntl::set((key1, look::union (data1, data2)), r);
                                NULL           =>   ntl::fold dfs (collect_nonterms key1) (ntl::set a);
                            esac;
                     end;

                look_info
                    = 
                    if   (not debug)
                        
                         look_info;
                    else
                         \\ nt =>
                             info
                             where 
                                 print "look_info of ";
                                 pr_nonterm nt;
                                 print "=\n";

                                 info = look_info nt;

                                 ntl::apply
                                     (   \\ (nt, lookahead)
                                             =>
                                             {   pr_nonterm nt;
                                                 print ": ";
                                                 pr_look lookahead;
                                                 print "\n\n";
                                             };
                                         end 
                                     )
                                     info;
                             end; end ;
                    fi;

                # prop_look: propagate lookaheads for nonterms added in the closure of a
                # nonterm.  Lookaheads must be propagated from each nonterminal m to
                # all nonterminals { n | m =c+=> nx, where x=*=>epsilon }

                fun prop_look ntl
                    =
                    ntl::fold  upd_nonterm  ntl  ntl
                    where 

                        fun upd_lookhd new_look (nt, r)
                            =
                            case (ntl::find ((nt, new_look), r))
                                #
                                THE (_, old_look)
                                    =>
                                    ntl::set((nt, look::union (new_look, old_look)), r);

                                NULL => raise exception (LALR 241);
                            esac;

                        fun upd_nonterm ((nt, get), r)
                            =
                            nonterm_set::fold
                                (upd_lookhd get)
                                (nonterms_w_null nt)
                                r;
                     end;

                prop_look
                    = 
                    if (not debug)
                        # 
                        prop_look;
                    else
                        \\ ntl
                            =>
                            info
                            where 

                                print "prop_look =\n";

                                info = prop_look ntl;

                                ntl::apply
                                    (   \\ (nt, lookahead)
                                            =>
                                            {   pr_nonterm nt;
                                                print ": ";
                                                pr_look lookahead;
                                                print "\n\n";
                                            };
                                        end 
                                    )
                                    info;
                            end; end ;
                    fi;

                # Now put the information from these functions together.
                # Create a function which takes a nonterminal n
                # and returns a list of triplets of
                # (a nonterm added through closure,
                # the lookahead for the nonterm,
                # whether the nonterm should include the lookahead for the nonterminal
                # whose closure is being taken (i.e. first (y) for an item j of the
                # form A -> x .n y and lookahead (j) if y =*=> epsilon)
                #
                stipulate
                    data =   make_rw_vector (nonterms, NIL:  List( (Nonterminal, List( Terminal ), Bool)));
                    #
                    fun do_nonterm i
                        =
                        result
                        where 
                            #
                            nonterms_followed_by_null
                                =
                                nonterms_w_null i;

                            nonterms_added_through_closure
                                = 
                                ntl::make_list (prop_look (look_info i));

                            result
                                =
                                map 
                                    (   \\ (nt, l)
                                            =
                                            (nt, l, nonterm_set::exists (nt, nonterms_followed_by_null))
                                    )
                                    nonterms_added_through_closure;

                            if debug
                                #
                                print "closure_nonterms = ";
                                pr_nonterm i;
                                print "\n";

                                apply
                                    (   \\ (nt, get, nullable)
                                            =
                                            {   pr_nonterm nt;
                                                print ":";
                                                pr_look get;

                                                case nullable
                                                    #
                                                    FALSE =>   print "(FALSE)\n";
                                                    TRUE  =>   print "(TRUE)\n";
                                                esac;
                                            }
                                    )
                                    result;

                                print "\n";
                            fi;
                        end;

                    fun f i
                        =
                        if (i != nonterms)
                            # 
                            rw_vector::set (data, i, do_nonterm (NONTERM i));
                            f (i+1);
                        fi;

                    my _ = f 0;
                herein
                    fun closure_nonterms (NONTERM i)
                        =
                        data[ i ];
                end;

                # add_nonterm_lookahead: Add lookahead to all completion items for rules added
                # when the closure of a given nonterm in some state is taken.  It returns
                # a list of lookahead refs to which the given nonterm's lookahead should
                # be propagated.   For each rule, it must trace the shift/gotos in the LR (0)
                # graph to find all items of the form A-> x .B y where y =*=> epsilon or
                # A -> x.

                fun add_nonterm_lookahead (nt, state)
                    =
                    list::fold_backward f [] (closure_nonterms nt)
                    where 
                        #
                        fun f ((nt, lookahead, nullable), r)
                            =
                            if   nullable    refs @ r;
                            else                    r;
                            fi
                            where 

                                refs =   map  (find_rule_refs state)  (produces nt);
                                refs =   list::cat  refs;

                                apply (\\ r =   r := (look::union (*r, lookahead)))
                                      refs;
                            end;
                    end;

                # scan_core: Scan a core for all items of the form A -> x .B y.
                #
                # Applies add_nonterm_lookahead to each such B, and then merges first (y)
                # into the list of refs returned by add_nonterm_lookahead.
                #
                # It returns a list of ref * ref List for all the items where y =*=> epsilon

                fun scan_core (CORE (l, state))
                    =
                    f (l, NIL)
                    where 

                        fun f ((item as ITEM { rhs_after=> NONTERMINAL b ! y, dot, rule } ) ! t, r)
                                =>
                                case (add_nonterm_lookahead (b, state))
                                    #
                                    NIL => r;

                                    l=> {   first_y = first y;
                                            #
                                            new_r =  if   (dot >= (look_pos rule))
                                                          (find_ref (state, item), l) ! r;
                                                     else r;
                                                     fi;

                                            apply (\\ r =   r := look::union(*r, first_y))
                                                  l;

                                            f (t, new_r);
                                        };
                               esac;

                            f (_ ! t, r) =>   f (t, r);
                            f (NIL,     r) =>   r;
                        end;
                    end;

                # Add end-of-parse symbols to set of items
                # consisting of all items immediately
                # derived from the start symbol

                fun add_eop (c as CORE (l, state), eop)
                    =
                    apply f l
                    where 
                        #
                        fun f (item as ITEM { rule, dot, ... } )
                            =
                            {   refs = find_rule_refs state rule;

                                # First take care of kernel items.
                                #
                                # Add the end-of-parse symbols to
                                # the lookahead sets for these items.
                                #
                                # Epsilon productions of the start symbol
                                # do not need to be handled specially
                                # because they will be in the kernel also.

                                apply   (\\ r =  r := look::union(*r, eop))   refs;

                                # Now take care of closure items.
                                # These are all nonterminals 'c' which
                                # have a derivation 's' =+=> .'c' x, where x is nullable

                                if (dot >= (look_pos rule))
                                    # 
                                    case item
                                        #
                                        ITEM { rhs_after=>NONTERMINAL b ! _, ... }
                                            =>
                                            case (add_nonterm_lookahead (b, state))
                                                #
                                                NIL =>   ();
                                                l   =>   apply   (\\ r =  r := look::union(*r, eop))   l;
                                            esac;

                                        _ => ();
                                    esac;
                                fi;
                            };
                    end;

                fun iterate l
                    =
                    loop FALSE
                    where 

                        fun f lookahead (NIL, done)
                                =>
                                done;

                            f lookahead (h ! t, done)
                                =>
                                {   old = *h;
                                    #
                                    h := look::union (old, lookahead);

                                    if (length *h  !=  length old)   f lookahead (t, FALSE);
                                    else                             f lookahead (t, done);
                                    fi;
                               };
                        end;

                        fun g ((from, to) ! rest, done)
                                =>
                                {   new_done = f  *from  (to, done);
                                    g (rest, new_done);
                                };

                            g (NIL, done) => done;
                        end;

                        fun loop TRUE  =>   ();
                            loop FALSE =>   loop (g (l, TRUE));
                        end;
                    end;

                lookahead =   list::cat (map scan_core (nodes graph));

                # used to scan the item list of a TMPCORE and remove the items not
                # being reduced

                fun create_lcore_list ((item as ITEM { rhs_after=>NIL, ... }, REF l), r)
                        =>
                        (item, l) ! r;

                    create_lcore_list (_, r)
                        =>
                        r;
                end;

            add_eop (graph::core graph 0, eop);
            iterate lookahead;

            map (   \\ (TMPCORE (l, state))
                        =
                        LCORE (list::fold_backward create_lcore_list [] l, state)
                )
                new_nodes;
        };
};


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext