src/lib/compiler/back/low/intel32/treecode/floating-point-code-intel32-g.pkg

15.4.266 src/lib/compiler/back/low/intel32/treecode/floating-point-code-intel32-g.pkg

## floating-point-code-intel32-g.pkg

# Compiled by:
#     src/lib/compiler/back/low/intel32/backend-intel32.lib

# This phase takes a cluster with pseudo intel32
# fp instructions, performs liveness analysis
# to determine their live ranges, and rewrites
# the program into the correct stack based code.
#
# The Basics
# ----------
# o We assume there are 7 pseudo fp registers, %fp (0), ..., %fp (6),
#   which are mapped onto the %st stack.  One stack location is reserved
#   for holding temporaries.
# o Important: for floating point comparisons, we actually need
#   two extra stack locations in the worst case.  We handle this by
#   specifying that the instruction define an extra temporary fp register
#   when necessary.
# o The mapping between %fp <-> %st may change from program point to
#   program point.  We keep track of this lazy renaming and try to minimize
#   the number of FXCH that we insert.
# o At split and merge points, we may get inconsistent %fp <-> %st mappings.
#   We handle this by inserting the appropriate renaming code.
# o Parallel copies (renaming) are rewritten into a sequence of FXCHs!
#
# Pseudo fp instructions     Semantics
# --------------------------------------
# FMOVE   src, dst           dst := src
# FILOAD  ea, dst            dst := cvti2f (mem[ea])
# FBINOP  lsrc, rsrc, dst    dst := lsrc * rsrc
# FIBINOP lsrc, rsrc, dst    dst := lsrc * cvti2f (rsrc)
# FUNOP   src, dst           dst := unaryOp src
# FCMP    lsrc, rsrc         fp condition code := fcmp (lsrc, rsrc)
#
# An instruction may use its source operand (s) destructively.
# We find this info using a global liveness analysis.
#
# The Translation
# ---------------
# o We keep track of the namings between %fp registers and the
#   %st(..) staack locations.
# o FXCH and FLDL are inserted at the appropriate places to move operands
#   to %st (0).  FLDL is used if the operand is not dead.  FXCH is used
#   if the operand is the last use.
# o FCOPY's between pseudo %fp registers are done by software renaming
#   and generate no code by itself!
# o FSTL %st (1) are also generated to pop the stack after the last use
#   of an operand.
#
# Note
# ----
# 1. This module should be run after floating point register allocation.
#
# -- Allen Leung Leung (leunga@cs.nyu.edu)
#
# See also:
#
#     Some notes on the new MLRISC Intel32 floating point code generator (Draft)
#     Allen Leung, Lal George
#     circa 2000, 17p
#     http://www.smlnj.org//compiler-notes/intel32-fp.ps

###               "You can't really focus yourself for years
###                unless you have undivided concentration,
###                which too many spectators would have destroyed."
###
###                                       -- Andrew Wiles

stipulate
    package an  =  note;                                                # note                                  is from   src/lib/src/note.pkg
    package ast =  asm_stream;                                          # asm_stream                            is from   src/lib/compiler/back/low/emit/asm-stream.pkg
    package cos =  registerkinds_junk::cos;                             # "cos" == "colorset".
    package fil =  file__premicrothread;                                # file__premicrothread                  is from   src/lib/std/src/posix/file--premicrothread.pkg
    package iht =  int_hashtable;                                       # int_hashtable                         is from   src/lib/src/int-hashtable.pkg
    package im  =  int_red_black_map;                                   # int_red_black_map                     is from   src/lib/src/int-red-black-map.pkg
    package lbl =  codelabel;                                           # codelabel                             is from   src/lib/compiler/back/low/code/codelabel.pkg
    package lem =  lowhalf_error_message;                               # lowhalf_error_message                 is from   src/lib/compiler/back/low/control/lowhalf-error-message.pkg
    package lms =  list_mergesort;                                      # list_mergesort                        is from   src/lib/src/list-mergesort.pkg
    package odg =  oop_digraph;                                         # oop_digraph                           is from   src/lib/graph/oop-digraph.pkg
    package pp  =  standard_prettyprinter;                              # standard_prettyprinter                is from   src/lib/prettyprint/big/src/standard-prettyprinter.pkg
    package rkj =  registerkinds_junk;                                  # registerkinds_junk                    is from   src/lib/compiler/back/low/code/registerkinds-junk.pkg
    package rwv =  rw_vector;                                           # rw_vector                             is from   src/lib/std/src/rw-vector.pkg
    package sos =  string_outstream;                                    # string_outstream                      is from   src/lib/compiler/back/low/library/string-out-stream.pkg

    Pp = pp::Pp;

    debug = FALSE;        # Set this to TRUE to debug this module
                          # set this to FALSE for production use.
                          #
    debug_liveness = TRUE; # Debug liveness analysis
    debug_dead = FALSE;    # Debug dead code removal
    sanity_check = TRUE;

herein

    # We are invoked from:
    #
    #     src/lib/compiler/back/low/intel32/regor/regor-intel32-g.pkg

    generic package   floating_point_code_intel32_g   (
        #             =============================
        #
        package mcf: Machcode_Intel32;                                          # Machcode_Intel32                      is from   src/lib/compiler/back/low/intel32/code/machcode-intel32.codemade.api

        package mu:  Machcode_Universals                                        # Machcode_Universals                   is from   src/lib/compiler/back/low/code/machcode-universals.api
                     where
                         mcf == mcf;                                            # "mcf" == "machcode_form" (abstract machine code).

        package mcg: Machcode_Controlflow_Graph                                 # Machcode_Controlflow_Graph            is from   src/lib/compiler/back/low/mcg/machcode-controlflow-graph.api
                     where
                         mcf == mcf;                                            # "mcf" == "machcode_form" (abstract machine code).

        package liv: Liveness                                                   # Liveness                              is from   src/lib/compiler/back/low/regor/liveness.api
                where
                    mcg == mcg;

        package ae: Machcode_Codebuffer_Pp                                      # Machcode_Codebuffer_Pp                is from   src/lib/compiler/back/low/emit/machcode-codebuffer-pp.api
                where
                     mcf == mcf                                                 # "mcf" == "machcode_form" (abstract machine code).
                also cst::pop == mcg::pop;                                      # "pop" == "pseudo_op".
    )
    : (weak) Machcode_Controlflow_Graph_Improver                                # Machcode_Controlflow_Graph_Improver   is from   src/lib/compiler/back/low/mcg/machcode-controlflow-graph-improver.api
    {
        # Export to client packages:
        #
        package mcg = mcg;

        stipulate
            package tcf =  mcf::tcf;                                                    # "tcf" == "treecode_form".
            package rgk =  mcf::rgk;                                                    # "rgk" == "registerkinds".
        herein

            Flowgraph = mcg::Machcode_Controlflow_Graph;
            An = an::Notes;

            improvement_name = "Intel32 (x86) floating point rewrite";

            fp_debug_mode_intel32 =  lowhalf_control::make_bool ("fp_debug_mode_intel32", "intel32 fp debug mode");
            fp_trace_mode_intel32 =  lowhalf_control::make_bool ("fp_trace_mode_intel32", "intel32 fp trace mode");

            fun error msg
                =
                lem::error("floating_point_code_intel32_g", msg);

            fun pr msg
                =
                fil::write (*lowhalf_control::debug_stream, msg);

            i2s = int::to_string;

            #####################################
            # No overflow checking is needed for
            # integer arithmetic in this module
            #####################################

            fun registerlist_to_registerset l
                =
                list::fold_backward
                    #
                    rkj::cls::add_codetemp_to_appropriate_kindlist
                    #
                    rkj::cls::empty_codetemplists
                    #
                    l;

            fun registerlist_to_string l
                =
                rkj::cls::codetemplists_to_string
                    #
                    (registerlist_to_registerset l);

            exception TARGET_MOVED_TO  odg::Node_Id;        # Annotation to mark split edges.

            #########################################################################
            # Base instruction-handling routines
            #########################################################################

            # Annotate an instruction:
            #
            fun mark (op,           []) =>  op;
                mark (op, note ! notes) =>  mark (mcf::NOTE { op, note }, notes);
            end;

            # Add pop suffix to a binary operator:
            #
            fun pop mcf::FADDL  => mcf::FADDP;   pop mcf::FADDS  => mcf::FADDP;
                pop mcf::FSUBL  => mcf::FSUBP;   pop mcf::FSUBS  => mcf::FSUBP;
                pop mcf::FSUBRL => mcf::FSUBRP;  pop mcf::FSUBRS => mcf::FSUBRP;
                pop mcf::FMULL  => mcf::FMULP;   pop mcf::FMULS  => mcf::FMULP;
                pop mcf::FDIVL  => mcf::FDIVP;   pop mcf::FDIVS  => mcf::FDIVP;
                pop mcf::FDIVRL => mcf::FDIVRP;  pop mcf::FDIVRS => mcf::FDIVRP;
                pop _ => error "fbinop::pop";
            end;

            # Invert the operator:
            #
            fun invert mcf::FADDL  => mcf::FADDL;   invert mcf::FADDS  => mcf::FADDS;
                invert mcf::FSUBL  => mcf::FSUBRL;  invert mcf::FSUBS  => mcf::FSUBRS;
                invert mcf::FSUBRL => mcf::FSUBL;   invert mcf::FSUBRS => mcf::FSUBS;
                invert mcf::FMULL  => mcf::FMULL;   invert mcf::FMULS  => mcf::FMULS;
                invert mcf::FDIVL  => mcf::FDIVRL;  invert mcf::FDIVS  => mcf::FDIVRS;
                invert mcf::FDIVRL => mcf::FDIVL;   invert mcf::FDIVRS => mcf::FDIVS;
                invert mcf::FADDP  => mcf::FADDP;   invert mcf::FMULP  => mcf::FMULP;
                invert mcf::FSUBP  => mcf::FSUBRP;  invert mcf::FSUBRP => mcf::FSUBP;
                invert mcf::FDIVP  => mcf::FDIVRP;  invert mcf::FDIVRP => mcf::FDIVP;
                invert _ => error "invert";
            end;

            # Pseudo instructions:
            #
            fun fld_fn (mcf::FP32, ea) => mcf::flds ea;
                fld_fn (mcf::FP64, ea) => mcf::fldl ea;
                fld_fn (mcf::FP80, ea) => mcf::fldt ea;
            end;

            fun fild_fn (mcf::INT8, ea) => error "FILD";
                fild_fn (mcf::INT16, ea) => mcf::fild ea;
                fild_fn (mcf::INT1, ea) => mcf::fildl ea;
                fild_fn (mcf::INT2, ea) => mcf::fildll ea;
            end;

            fun fstp_fn (mcf::FP32, ea) => mcf::fstps ea;
                fstp_fn (mcf::FP64, ea) => mcf::fstpl ea;
                fstp_fn (mcf::FP80, ea) => mcf::fstpt ea;
            end;

            fun fst_fn (mcf::FP32, ea) => mcf::fsts ea;
                fst_fn (mcf::FP64, ea) => mcf::fstl ea;
                fst_fn (mcf::FP80, ea) => error "FSTT";
            end;

            # -----------------------------------------------------------------------
            # Prettyprint routines
            # -----------------------------------------------------------------------
            fun freg_to_string f
                =
                "%f" + i2s (rkj::intrakind_register_id_of f);

            fun fregs_to_string s
                =
                list::fold_backward

                    \\ (r, "") =>  freg_to_string  r;
                       (r,  s) =>  freg_to_string  r   + " " + s;
                    end

                    ""

                    s;

            fun blknum_of (mcg::BBLOCK { id, ... } )
                =
                id;

            # -----------------------------------------------------------------------
            # A stack enum that mimics the intel32 floating point stack
            # and keeps track of namings between %st (n) and %fp (n).
            # -----------------------------------------------------------------------
            package st
                :
                api {
                    Stack;
                    Stnum = Int; #  0 -- 7
                    create:  Void -> Stack;
                    stack0:  Stack;
                    copy:    Stack -> Stack;
                    clear:   Stack -> Void;
                    fp:      (Stack, rkj::Interkind_Register_Id) -> Stnum;
                    st:      (Stack, Stnum) -> rkj::Interkind_Register_Id;
                    set:     (Stack, Stnum, rkj::Interkind_Register_Id) -> Void;
                    push:    (Stack, rkj::Interkind_Register_Id) -> Void;
                    xch:     (Stack, Stnum, Stnum) -> Void;
                    pop:     Stack -> Void;
                    depth:   Stack -> Int;
                    non_full:  Stack -> Void;
                    kill:    (Stack, rkj::Codetemp_Info) -> Void;
                    stack_to_string:  Stack -> String;
                    equal:  (Stack, Stack) -> Bool;
                }
                =
                package {

                    Stnum = Int;

                    Stack = STACK { st:   rwv::Rw_Vector( rkj::Interkind_Register_Id ), # Mapping %st -> %fp registers
                                    fp:   rwv::Rw_Vector( Stnum ),                      # Mapping %fp -> %st registers
                                    sp:   Ref( Int )                                    # Stack pointer.
                                  };

                    # Create a new stack:
                    #
                    fun create ()
                        =
                        STACK { st => rwv::make_rw_vector (8,-1),
                                fp => rwv::make_rw_vector (7, 16),
                                sp => REF -1
                              };

                    stack0 = create();

                    # Copy a stack:
                    #
                    fun copy (STACK { st, fp, sp } )
                        =
                        {   st' = rwv::make_rw_vector (8, -1);
                            fp' = rwv::make_rw_vector (7, 16);

                            rwv::copy  { from => st,  into => st',  at => 0 };
                            rwv::copy  { from => fp,  into => fp',  at => 0 };

                            STACK { st=>st', fp=>fp', sp=>REF *sp };
                        };

                    # Depth of stack:
                    #
                    fun depth (STACK { sp, ... } )
                        =
                        *sp + 1;

                    fun non_full (STACK { sp, ... } )
                        =
                        if   (*sp >= 7)   error "stack overflow";   fi;

                     #  Given %st (n), lookup the corresponding %fp (n)
                     #
                     fun st (STACK { st, sp, ... }, n)
                         =
                         rwv::get (st, *sp - n);

                    # Given %fp (n), lookup the corresponding %st (n)
                    #
                    fun fp (STACK { fp, sp, ... }, n)
                        =
                        *sp - rwv::get (fp, n);

                    fun stack_to_string stack
                        =
                        {   depth = depth stack;
                            #
                            fun f i
                                =
                                if   (i >= depth   )   " ]";
                                else                   "%st(" + i2s i + ")=%f" + i2s (st (stack, i)) + " " + f (i+1);
                                fi;

                            "[ " + f 0;
                        };

                    fun clear (STACK { st, fp, sp, ... } )
                        =
                        {   sp := -1;
                            #
                            rwv::map_in_place  (\\ _ = -1)  st;
                            rwv::map_in_place  (\\ _ = 16)  fp;
                        };

                    # Set %st (n) := %f
                    #
                    fun set (STACK { st, fp, sp, ... }, n, f)
                        =
                        {   rwv::set (st, *sp - n, f);
                            #
                            if (f >= 0)   rwv::set (fp, f, *sp - n);   fi;
                        };

                    # Pop one entry:
                    #
                    fun pop (STACK { sp, st, fp, ... } )
                        =
                        sp := *sp - 1;

                    # Push %fp (f) onto %st (0)
                    #
                    fun push (stack as STACK { sp, ... }, f)
                        =
                        {   sp :=  *sp + 1;
                            #
                            set (stack, 0, f);
                        };

                    # Exchange the contents of %st (m) and %st (n):
                    #
                    fun xch (stack, m, n)
                        =
                        {   f_m = st (stack, m);
                            f_n = st (stack, n);

                            set (stack, m, f_n);
                            set (stack, n, f_m);
                        };

                    fun kill (STACK { fp, ... }, f)
                        =
                        rwv::set (fp, rkj::intrakind_register_id_of f, 16);

                    fun equal (st1, st2)
                        =
                        {   m = depth st1;
                            n = depth st2;

                            fun loop i
                                =
                                i >= m
                                or
                                (    st (st1, i) == st (st2, i)
                                     and
                                     loop (i+1)
                                );

                            m == n
                            and
                            loop 0;
                        };

                };                              # pkg st

            # -----------------------------------------------------------------------
            # Module to handle forward propagation.
            # Forward propagation does the following:
            # Given an instruction
            #   fmove mem, %fp (n)
            # We delay the generation of the load until the first use of %fp (n),
            # which we can further improve by folding the load into the operand
            # of the instruction, if it is the last use of this operand.
            # If %fp (n) is dead then no load is necessary.
            # Of course, we have to be careful whenever we encounter other
            # instruction with a write.
            # -----------------------------------------------------------------------*)
            /*
            package ForwardPropagation :>
            api
               type readbuffer
               my create:  st::stack -> readbuffer
               my load:    readbuffer * rgk::register * mcf::fsize * mcf::ea -> Void
               my getreg:  readbuffer * Bool * rgk::register * List( mcf::instruction ) ->
                                 mcf::operand * List( mcf::instruction )
               my flush:   readbuffer * List( mcf::instruction ) -> List( mcf::instruction )
            end =
            pkg

               enum readbuffer =
                  READ of { stack:     st::stack,
                            loads:      rwv::Rw_Vector( Null_Or( mcf::fsize * mcf::ea ) ),
                            pending:   Ref( Int )
                          }

               fun create stack =
                   READ { stack   =stack,
                        loads   =rwv::make_rw_vector (8, NULL),
                        pending =REF 0
                       }

               fun load (READ { pending, loads, ... }, fd, fsize, mem) =
                   (rwv::set (loads, fd, THE (fsize, mem));
                    pending := *pending + 1
                   )

               /* Extract the operand for a register
                * If it has a delayed load associated with it then
                * we perform the load at this time.
                */
               fun getreg (READ { pending, loads, stack, ... }, isLastUse, fs, code) =
                   case rwv::get (loads, fs) of
                     NULL =>
                     let n = st::st (stack, fs)
                     in  if isLastUse
                         then (ST n, code)
                         else let code = mcf::FLDL (ST n) ! code
                              in  st::push (stack, fs); (ST0, code)
                              end
                     end
                   | THE (fsize, mem) =>
                     let code = fld_fn (fsize, mem) ! code
                     in  rwv::set (loads, fs, NULL); #  Delete load
                         pending := *pending - 1;
                         st::push (stack, fs);        #  fs is now in place
                         (ST0, code)
                     end

               /* Extract a binary operand.
                * We'll try to fold this into the operand
                */
               fun getopnd (READ { pending, loads, stack, ... }, isLastUse, mcf::FPR fs, code) =
                   (case rwv::get (loads, fs) of
                     NULL =>
                     let n = st::st (stack, fs)
                     in  if isLastUse fs #  regmap XXX
                         then (ST n, code)
                         else let code = mcf::FLDL (ST n) ! code
                              in  st::push (stack, fs); (ST0, code)
                              end
                     end
                   | THE (fsize, mem) =>
                      (rwv::set (loads, fs, NULL); #  Delete load
                       pending := *pending - 1;
                       if isLastUse fs then (mem, code)
                       else let code = fld_fn (fsize, mem) ! code
                            in  st::push (stack, fs);
                                (ST0, code)
                            end
                      )
                   )
                 | getopnd(_, _, ea, code) = (ea, code)

               fun flush (READ { pending=REF 0, ... }, code) = code

            end #  pkg
             */

            # -----------------------------------------------------------------------
            # Module to handle delayed stores.
            # Delayed store does the following:
            # Given an instruction
            #   fstore %fp (n), %mem
            # We delay the generation of the store until necessary.
            # This gives us an opportunity to rearrange the order of the stores
            # to eliminate unnecessary fxch.
            # -----------------------------------------------------------------------
            /*
            package DelayStore :>
            api
               type writebuffer
               my create:  st::stack -> writebuffer
               my flush:  writebuffer *  List( mcf::instruction ) -> List( mcf::instruction )
            end =
            pkg
               enum writebuffer =
                  WRITE of { front:    Ref( List (mcf::ea * rgk::register) ),
                             back:     Ref( List (mcf::ea * rgk::register) ),
                             stack:    st::stack,
                             pending:  Ref( Int )
                           }
               fun create stack = WRITE { front=REF [], back=REF [],
                                        stack=stack, pending=REF 0 }
               fun flush (WRITE { pending=REF 0, ... }, code) = code
            end #  pkg
            */

            # -----------------------------------------------------------------------
            # Main routine.
            #
            # Algorithm:
            #  1. Perform liveness analysis.
            #  2. For each fp register, mark all its last use point (s).
            #     Registers are popped at their last uses.
            #  3. Rewrite the instructions basic block by basic block.
            #  4. Insert shuffle code at basic block boundaries.
            #     When necessary, split critical edges.
            #  5. Sacrifice a goat to make sure things don't go wrong.
            # -----------------------------------------------------------------------
            fun run (mcg' as odg::DIGRAPH mcg)
                =
                {
                    number_of_blks = mcg.capacity ();

                    entry_i        = list::head (mcg.entries ());
                    exit_i         = list::head (mcg.exits   ());

                    get_float_codetemp_infos = rgk::get_codetemp_infos_for_kind  rkj::FLOAT_REGISTER;                     # extract the fp component of registerset

                    st_table = rwv::from_fn (8, \\ n = mcf::ST (rgk::st n));

                    fun st_fn n
                        =
                        {   if (sanity_check and (n < 0 or n >= 8))
                                   pr("WARNING BAD %st(" + i2s n + ")\n");
                            fi;

                            rwv::get (st_table, n);
                        };

                    fun fxch_fn n
                        =
                        mcf::fxch { operand=>rgk::st n };

                    st0 = st_fn 0;
                    st1 = st_fn 1;
                    pop_st = mcf::fstpl st0; #  Instruction to pop an entry

                    # Dump instructions:
                    #
                    fun dump instrs
                        =
                        {   # buf =  ast::with_stream   *lowhalf_control::debug_stream   ae::make_codebuffer   [];

                            text =  pp::prettyprint_to_string [] {.
                                        pp = #pp;
                                        buf = ae::make_codebuffer pp [];
                                        apply   buf.put_op   (reverse instrs);
                                    };

                            print text;
                        };

                    # Create assembly-code for one machine instruction:
                    #
                    fun assemble op
                        =
                        {
#                           stream_buf    =  sos::make_stream_buf ();
#                           stream =  sos::open_string_out  stream_buf;
#                           buf =  ast::with_stream  stream  ae::make_codebuffer  [];
#                           buf.put_op  op;
#                           s = sos::get_string stream_buf;

                            s =     pp::prettyprint_to_string [] {.
                                        pp = #pp;
                                        buf = ae::make_codebuffer pp [];
                                        buf.put_op  op;
                                    };
                            n = string::length_in_bytes s;

                            if (n == 0)    s;
                            else           string::substring (s, 0, n - 1);                     # Drop terminal newline?
                            fi;
                        };

                    # ------------------------------------------------------------------
                    # Perform liveness analysis on the floating point variables
                    # p::S. I'm glad I didn't throw away the code liveness code.
                    # ------------------------------------------------------------------

                    def_use = mu::def_use rkj::FLOAT_REGISTER;   #  Def/use properties

                    my { live_in=>live_in_table, live_out=>live_out_table }
                        =
                        liv::liveness {
                             def_use,
                             #  updateRegister=rgk::updateRegistersByKind rkj::FLOAT_REGISTER,
                             get_codetemps_of_our_kind => get_float_codetemp_infos
                           } mcg';

                    # ------------------------------------------------------------------
                    # Scan the instructions compute the last uses and dead definitions
                    # at each program point.  Ideally we can do this during the code
                    # rewriting phase. But that's probably too error prone for now.
                    # ------------------------------------------------------------------
                    fun compute_last_use (blknum, ops, live_out)
                        =
                        {   fun scan ([], _, last_use)
                                    =>
                                    last_use;

                                scan (i ! instrs, live, last_use)
                                    =>
                                    {   (def_use  i) ->   (d, u);
                                        #
                                        d       = cos::make_colorset d; # Definitions
                                        u       = cos::make_colorset u; # uses
                                        #
                                        dead    = cos::get_codetemps_in_colorset (cos::difference_of_colorsets (d, live));
                                        live    = cos::difference_of_colorsets (live, d);
                                        last    = cos::get_codetemps_in_colorset (cos::difference_of_colorsets (u, live));
                                        live    = cos::union_of_colorsets (live, u);

                                        if (debug and debug_liveness)
                                            #
                                            case last
                                                #
                                                [] => ();
                                                _  => print (assemble i + "\tlast use=" + fregs_to_string last + "\n");
                                            esac;
                                        fi;

                                        scan (instrs, live, (last, dead) ! last_use);
                                    };
                            end;

                            live_out_set = cos::make_colorset live_out;

                            if (debug and debug_liveness)
                                #
                                print("LiveOut(" + i2s blknum + ") = " +
                                fregs_to_string (cos::get_codetemps_in_colorset live_out_set) + "\n");
                            fi;

                            scan (*ops, live_out_set, []);
                        };

                    ####################################################################
                    # Temporary work space

                    stipulate
                        (rgk::get_id_range_for_physical_register_kind  rkj::FLOAT_REGISTER)
                            ->
                            { max_register_id, ... };
                    herein
                        n = max_register_id + 1;
                    end;

                    last_use_table  = rwv::make_rw_vector (n,-1);       # Table for marking last uses.
                    use_table       = rwv::make_rw_vector (n,-1);       # Table for marking uses.

                    #  %fp register namings before and after a basic block
                    #
                    namings_in  = rwv::make_rw_vector (number_of_blks, NULL);
                    namings_out = rwv::make_rw_vector (number_of_blks, NULL);

                    stamp_counter = REF -4096;

                    # Edges that need splitting:
                    #
                    exception NO_EDGES_TO_SPLIT;

                    edges_to_split    = iht::make_hashtable  { size_hint => 32,  not_found_exception => NO_EDGES_TO_SPLIT };

                    add_edges_to_split = iht::set edges_to_split;

                    fun lookup_edges_to_split b
                        =
                        the_else (iht::find edges_to_split b, []);

                    # ------------------------------------------------------------------
                    # Code for handling namings between basic block
                    # ------------------------------------------------------------------

                    fun split_edge (title, source, target, e)
                        =
                        {   if (debug and *fp_trace_mode_intel32)
                                pr (title + " SPLITTING " + i2s source + "->" +  i2s target + "\n");
                            fi;

                            add_edges_to_split (target, (source, target, e) ! lookup_edges_to_split target);
                       };

#                   fun compute_freq (_, _, mcg::EDGE { execution_frequency, ... } )            # Is this ever used?
#                       =
#                       *execution_frequency;

                    # Given a registerset, return a sorted and unique
                    # list of elements with all non-physical registers removed
                    #
                    fun remove_non_physical registerlist
                        =
                        loop (registerlist, [])
                        where
                            fun loop ([], sss)
                                    =>
                                    cos::get_codetemps_in_colorset (cos::make_colorset sss);

                                loop (f ! fs, sss)
                                    =>
                                    {   fx = rkj::intrakind_register_id_of f;
                                        loop (fs, if (fx <= 7) f ! sss; else sss;fi);
                                    };
                            end;
                        end;

                    # Given a sorted and unique list of registers,
                    # Return a stack with these elements
                    #
                    fun new_stack fregs
                        =
                        {   stack = st::create();

                            apply (\\ f = st::push (stack, rkj::intrakind_register_id_of f))
                                  (reverse fregs);

                            stack;
                        };

                    # This function looks at all the entries on the stack,
                    # and generate code to deallocate all the dead values.
                    # The stack is updated.
                    #
                    fun remove_dead_values (stack, live_set, code)
                        =
                        loop (0, st::depth stack, code)
                        where

                            stamp = *stamp_counter;

                            stamp_counter := *stamp_counter - 1;

                            fun mark_live []
                                    =>
                                    ();

                                mark_live (r ! rs)
                                    =>
                                    {   rwv::set (use_table, rkj::intrakind_register_id_of r, stamp);
                                        mark_live rs;
                                    };
                            end;

                            fun is_live f
                                =
                                rwv::get (use_table, f)   ==   stamp;

                            fun loop (i, depth, code)
                                =
                                if (i >= depth)

                                     code;
                                else
                                      f = st::st (stack, i);

                                      if (is_live f)                     #  live?

                                           loop (i+1, depth, code);
                                      else
                                           if (debug and *fp_trace_mode_intel32)

                                               pr("REMOVING %f" + i2s f + " in %st(" + i2s i + ")" +
                                                " current stack=" + st::stack_to_string stack + "\n");
                                           fi;

                                           if (i == 0)

                                               st::pop stack;
                                               loop (0, depth - 1, pop_st ! code);
                                           else
                                               st::xch (stack, 0, i);
                                               st::pop stack;
                                               loop (0, depth - 1, mcf::fstpl (st_fn i) ! code);
                                           fi;
                                      fi;
                                fi;

                            mark_live live_set;
                        end;

                    # ------------------------------------------------------------------
                    # Given two stacks, source and target, where the namings are
                    # permutation of each other, generate the minimal number of
                    # fxchs to match source with target.
                    #
                    # Important: source and target MUST be permutations of each other.
                    #
                    # Essentially, we first decompose the permutation into cycles,
                    # and process each cycle.
                    # ------------------------------------------------------------------
                    #
                    fun shuffle (source, target, code)
                        =
                        {   stamp = *stamp_counter;
                            stamp_counter := *stamp_counter - 1;
                            permutation = last_use_table; /* reuse the space */

                            if (debug and *fp_trace_mode_intel32)
                                          pr("Shuffle " + st::stack_to_string source +
                                                   "->" + st::stack_to_string target + "\n");
                            fi;

                            #  Compute the initial permutation
                            #
                            n = st::depth source;
                            #
                            fun compute_initial_permutation (i)
                                =
                                if (i < n)

                                   f =  st::st (source, i);
                                   j =  st::fp (target, f);

                                   rwv::set (permutation, j, i);

                                   compute_initial_permutation (i+1);
                                fi;

                            compute_initial_permutation 0;

                            # Decompose the initial permutation into cycles.
                            # The cycle involving 0 is treated specially.

                            visited = use_table;

                            fun is_visited i
                                =
                                rwv::get (visited, i) == stamp;

                            fun mark_as_visited i
                                =
                                rwv::set (visited, i, stamp);

                            fun decompose_cycles (i, cycle0, cycles)
                                =
                                if (i >= n)

                                     (cycle0, cycles);

                                elif (is_visited i  or  rwv::get (permutation, i) == i)          #  trivial cycle

                                     decompose_cycles (i+1, cycle0, cycles);
                                else
                                    fun make_cycle (j, cycle, zero)
                                         =
                                         {   k = rwv::get (permutation, j);
                                             cycle = j ! cycle;
                                             zero  = zero or j == 0;
                                             mark_as_visited j;

                                             if (k == i)  (cycle, zero);
                                             else         make_cycle (k, cycle, zero);
                                             fi;
                                        };

                                    my (cycle, zero)
                                        =
                                        make_cycle (i, [], FALSE);

                                    zero
                                      ?? decompose_cycles (i+1, [cycle],        cycles)
                                      :: decompose_cycles (i+1, cycle0, cycle ! cycles);
                                fi;

                            my (cycle0, cycles)
                                =
                                decompose_cycles (0, [], []);

                            # Generate shuffle for a cycle that does not involve 0.
                            # Given a cycle (c_1, ..., c_k), we generate this code:
                            #  fxch %st (c_1),
                            #  fxch %st (c_2),
                            #  ...
                            #  fxch %st (c_k),
                            #  fxch %st (c_1)
                            #
                            fun genxch ([], code) => code;
                                genxch (c ! cs, code) => genxch (cs, fxch_fn c ! code);
                            end;

                            fun gen ([], code) => error "shuffle::gen";
                                gen (cs as (c ! _), code) => fxch_fn c ! genxch (cs, code);
                            end;

                            # Generate shuffle for a cycle that involves 0.
                            # Given a cycle (c_1, ..., c_k) we first shuffle this to
                            # an equivalent cycle (c_1, ..., c_k) where c'_k = 0,
                            # then we generate this code:
                            #  fxch %st (c'_1),
                            #  fxch %st (c'_2),
                            #  ...
                            #  fxch %st (c'_{ k - 1 } ),
                            #
                            fun gen0 ([], code)
                                    =>
                                    error "shuffle::gen0";

                                gen0 (cs, code)
                                    =>
                                    {   fun rearrange (0 ! cs, cs') =>  cs@reverse cs';
                                            rearrange (c ! cs, cs') =>  rearrange (cs, c ! cs');
                                            rearrange ([], _)       =>  error "shuffle::rearrange";
                                        end;

                                        cs = rearrange (cs, []);
                                        genxch (cs, code);
                                    };
                            end;

                            # Generate code.  Must process
                            # the non-zero cycles first:
                            #
                            code = list::fold_backward gen code cycles;
                            code = list::fold_backward gen0 code cycle0;

                            code;
                        };                              # fun shuffle

                    /*------------------------------------------------------------------
                     * Insert code at the end of a basic block.
                     * Make sure we put code in front of a transfer instruction
                     *------------------------------------------------------------------*/
                    fun insert_at_end (ops, code)
                        =
                        case ops
                            #
                            [] => code;

                            jmp ! rest
                                =>
                                mu::instruction_kind jmp == mu::k::JUMP
                                    ??  jmp ! code @ rest
                                    ::        code @ ops;
                        esac;

                    /*------------------------------------------------------------------
                     * Magic for inserting shuffle code at the end of a basic block
                     *------------------------------------------------------------------*/
                    fun shuffle_out (stack_out, ops, b, block, live_out)
                        =
                        {
                            live_out = remove_non_physical (live_out);

                            # Generate code that removes
                            # unnecessary values:
                            #
                            code = remove_dead_values (stack_out, live_out, []);

                            fun done (stack_out, ops, code)
                                =
                                {   rwv::set (namings_out, b, THE stack_out);
                                    insert_at_end (ops, code);
                                };

                            # Generate code that shuffles values
                            # from source to target:
                            #
                            fun match (source, target)
                                =
                                done (target, ops, shuffle (source, target, []));

                            # Generate code that shuffles
                            # values from source to live_out:
                            #
                            fun match_live_out ()
                                =
                                case live_out
                                    [] =>  done  (stack_out, ops, code);
                                    _  =>  match (stack_out, new_stack live_out);
                                esac;

                            # With multiple successors, decide
                            # which one to connect to. We choose
                            # the one from the block that follows
                            # from this one, if that exists, or
                            # else the edge with the highest frequency:
                            #
                            fun find ([], _, id, best)
                                    =>
                                    (id, best);

                                find((_, target, _) ! edges, highest_freq, id, best)
                                    =>
                                    {   (mcg.node_info  target)
                                            ->
                                            mcg::BBLOCK { execution_frequency, ... };

                                        if (target == b+1)
                                            #
                                            (target, rwv::get (namings_in, target));
                                        else
                                            case (rwv::get (namings_in, target))
                                                #
                                                NULL => find (edges, highest_freq, id, best);

                                                this as THE stack
                                                    =>
                                                    if (highest_freq < *execution_frequency)    find (edges, *execution_frequency, target, this);
                                                    else                                        find (edges, highest_freq,         id,     best);
                                                    fi;
                                            esac;
                                        fi;
                                    };
                            end;

                            # Split all edges source->target
                            # except omit_this:
                            #
                            fun split_all_edges_except ([], omit_this)
                                    =>
                                    ();

                                split_all_edges_except((source, target, e) ! edges, omit_this)
                                    =>
                                    if (target == exit_i)
                                         error "can't split exit edge!";
                                    else
                                        if (   target != omit_this
                                           and target <= b          #  XXX
                                           and target != entry_i
                                           )
                                             split_edge("ShuffleOut", source, target, e);
                                        fi;

                                        split_all_edges_except (edges, omit_this);
                                    fi;
                              end;

                            # Just one successor.
                            # Try to match the namings of
                            # the successor if it exists:
                            #
                            fun match_it next
                                =
                                {   my (succ_block, target)
                                        =
                                        find (next, -1.0, -1, NULL);

                                    split_all_edges_except (next, succ_block);

                                    case target
                                        THE stack_in => match (stack_out, stack_in);
                                        NULL         => done  (stack_out, ops, code);
                                    esac;
                                };

                            case (mcg.out_edges b)

                                [] => match_live_out();

                                next as [(_, target, _)]
                                    =>
                                    target == exit_i
                                      ??  match_live_out ()
                                      ::  match_it next;

                                next =>
                                    match_it next;
                            esac;
                        };                              # fun shuffle_out

                    # ------------------------------------------------------------------
                    # Compute the initial fp stack namings for basic block b.
                    # ------------------------------------------------------------------
                    fun shuffle_in (b, block, live_in)
                        =
                        {
                            live_in_set = remove_non_physical live_in;

                            # With multiple predecessors, find out which one we
                            # should connect to.   Choose the one from the block that
                            # falls into this one, if that exists, or else choose
                            # from the edge with the highest frequency.
                            #
                            fun find ([], _, best)
                                    =>
                                    best;

                                find ((source, _, _) ! edges, highest_freq, best)
                                    =>
                                    {   (mcg.node_info  source)
                                            ->
                                            mcg::BBLOCK { execution_frequency, ... };

                                        case (rwv::get (namings_out, source))
                                            #
                                            NULL =>   find (edges, highest_freq, best);

                                            this as THE stack
                                                =>
                                                if (source == b - 1)                                                            this;                           # Falls into b.
                                                elif (highest_freq < *execution_frequency)   find (edges, *execution_frequency, this);
                                                else                                         find (edges, highest_freq,         best);
                                                fi;
                                        esac;
                                    };
                            end;

                            fun split_all_done_edges []
                                    =>
                                    ();

                                split_all_done_edges ((source, target, e) ! edges)
                                    =>
                                    {   if (   source < b
                                           and source != entry_i
                                           and source != exit_i
                                           )

                                            split_edge("ShuffleIn", source, target, e);
                                        fi;

                                       split_all_done_edges edges;
                                   };
                            end;

                            # The initial stack namings are
                            # determined by the live set.
                            # No compensation code is needed.
                            #
                            fun from_live_in ()
                                =
                                {   stack_in
                                        =
                                        case live_in_set

                                            [] => st::stack0;

                                            _  => {   pr("liveIn=" + registerlist_to_string live_in + "\n");
                                                      new_stack live_in_set ;
                                                  };
                                        esac;

                                    stack_out = st::copy stack_in;

                                    (stack_in, stack_out, []);
                                };

                            prior = mcg.in_edges b;

                            my (stack_in, stack_out, code)
                                =
                                case (find (prior, -1.0, NULL))

                                    NULL =>
                                        {   split_all_done_edges  prior;
                                            from_live_in ();
                                        };

                                    THE stack_in'
                                        =>
                                        case prior

                                            [_] =>
                                                {       # One predecessor.

                                                    # Use the namings as from the previous block
                                                    # We first have to deallocate all unused values.
                                                    #
                                                    stack_out = st::copy stack_in';

                                                    # Clean the stack of unused entries:
                                                    #
                                                    code = remove_dead_values (stack_out, live_in_set, []);

                                                    (stack_in', stack_out, code);
                                                };

                                            prior =>
                                                {   # More than one predecessor.

                                                    stack_in  =  st::copy stack_in';
                                                    code      =  remove_dead_values (stack_in, live_in_set, []);
                                                    stack_out =  st::copy stack_in;

                                                    # If we have to generate code to deallocate
                                                    # the stack then we have split the edge:
                                                    #
                                                    case code
                                                        [] => ();
                                                        _  => split_all_done_edges (prior);
                                                    esac;

                                                    (stack_in, stack_out, []);
                                                };
                                         esac;

                                esac;

                            rwv::set (namings_in,  b, THE stack_in );
                            rwv::set (namings_out, b, THE stack_out);

                            (stack_in, stack_out, code);
                        };

                    # ------------------------------------------------------------------
                    # Code for patching up critical edges.
                    # The trick is finding a good place to insert the critical edges.
                    # Let's call an edge x->y that requires compensation
                    # code c to be inserted an candidate edge.  We write this as x->y (c)
                    #
                    # Here are the heuristics that we use to improve the final code:
                    #
                    #    1. Given two candidate edges a->x (c1) and b->x (c2) where c1=c2
                    #       then we can merge the two copies of compensation code.
                    #       This is quite common.  This generalizes to any number of edges.
                    #
                    #    2. Given two candidate edges a->x (c1) and b->x (c2) and where
                    #       c1 and c2 are pops, we can partially share c1 and c2.
                    #       Currently, I think I only recognize this case when
                    #       x has no fp registers live-in.
                    #
                    #    3. Given two candidate edges a->x (c1) and b->x (c2),
                    #       if a->x has a higher frequency then put the compensation
                    #       code in front of x (so that it falls through into x)
                    #       whenever possible.
                    #
                    # As you can see, the voodoo is strong here.
                    #
                    # The routine has two main phases:
                    #    1. Determine the compensation code by applying the heuristics
                    #       above.
                    #    2. Then insert them and rebuild the mcg by renaming all block
                    #       ids.  This is currently necessary to keep the layout order
                    #       consistent with the order of the id.
                    # ------------------------------------------------------------------

                    fun repair_critical_edges (mcg' as odg::DIGRAPH mcg)
                        =
                        {
                            cleanup  = [lowhalf_notes::comment.x_to_note "cleanup edge" ];
                            critical = [lowhalf_notes::comment.x_to_note "critical edge"];

                            fun annotate (gen, an)
                                =
                                apply (\\ ((_, mcg::BBLOCK { notes, ... } ), _)
                                          =
                                          notes := an
                                      )
                                      gen;

                            # Special case: target block has stack depth of 0.
                            # Just generate code that pop entries from the sources.
                            # To make things interesting, we try to share code among
                            # all the critical edges.
                            #
                            fun gen_popping_code (_, [])
                                    =>
                                    ();

                                gen_popping_code (target_id, edges)
                                    =>
                                    {   # Edges annotated with the source stack depth
                                        # Ordered by increasing stack height
                                        #
                                        edges
                                            =
                                            im::keyvals_list
                                                (fold_backward
                                                    (\\ (edge as (source_id, _, _), mmm)
                                                        =
                                                        {   n = st::depth (the (rwv::get (namings_out, source_id)));
                                                            im::set (mmm, n, edge ! the_else (im::get (mmm, n), []));
                                                        }
                                                    )
                                                    im::empty
                                                    edges
                                                );

                                        # Generate n pops:
                                        #
                                        fun pops (0, code) =>  code;
                                            pops (n, code) =>  pops (n - 1, pop_st ! code);
                                        end;

                                        # Create the chain of blocks:
                                        #
                                        fun make_chain (depth, [], chain)
                                                =>
                                                chain;

                                            make_chain (depth, (d, es) ! es', chain)
                                                =>
                                                {   code = pops (d - depth, []);
                                                    make_chain (d, es', (es, code) ! chain);
                                                };
                                        end;

                                        chain = make_chain (0, edges, []);

                                        annotate
                                          ( mcg::split_edges            # split_edges   def in    src/lib/compiler/back/low/mcg/machcode-controlflow-graph-g.pkg
                                                mcg'
                                                { groups => chain,
                                                  jump   => FALSE
                                                },
                                                cleanup
                                          );
                                    };
                            end;

                            # Generate repair code.
                            #
                            fun gen_repair_code (target_id, stack_in, edges)
                                =
                                {   live_in = iht::get  live_in_table  target_id;
                                    live_in_set = remove_non_physical live_in;

                                    if debug   pr("LiveIn = " + registerlist_to_string live_in + "\n");   fi;

                                    # Group all edges whose output stack configurations
                                    # are the same.  Each group is merged together into
                                    # a single compensation block
                                    #
                                    fun partition ([], s)
                                            =>
                                            s;

                                        partition((e as (src, _, _)) ! es, s)
                                            =>
                                            find (s, [])
                                            where
                                                stack_out = st::copy (the (rwv::get (namings_out, src)));

                                                fun find ([], s)
                                                        =>
                                                        partition (es, ([e], stack_out) ! s);

                                                    find((x as (es', st')) ! s', s)
                                                        =>
                                                        if (st::equal (stack_out, st'))
                                                            partition (es, (e ! es', st') ! s' @ s);
                                                        else
                                                            find (s', x ! s);
                                                        fi;
                                                end;
                                            end;
                                    end;

                                    # Partition by the source namings:
                                    #
                                    sss = partition (edges, []);

                                    # Compute frequencies
                                    #
                                    sss =   map (\\ (edges, st)
                                                    =
                                                    (mcg::sum_edge_execution_frequencies edges,  edges,  st)
                                                )
                                                sss;

                                    # Order by non-increasing frequencies:
                                    #
                                    sss =   lms::sort_list
                                                #
                                                (\\ ((x, _, _), (y, _, _)) =  x < y)
                                                #
                                                sss;

                                    # Generate code:
                                    #
                                    fun gen (freq, edges, stack_out)
                                        =
                                        {   # Deallocate unused values:
                                            #
                                            code = remove_dead_values (stack_out, live_in_set,[]);

                                            # Shuffle values:
                                            #
                                            code = shuffle (stack_out, stack_in, code);

                                            annotate(
                                                 mcg::split_edges mcg' { groups => [(edges, code)], jump => FALSE },
                                                       critical);
                                        };

                                    apply gen sss;
                                };

                            # Split all edges entering target_id:
                            #
                            fun split (target_id, edges)
                                =
                                {   stack_in = the (rwv::get (namings_in, target_id));

                                    fun log (s, t, e)
                                        =
                                        case (rwv::get (namings_out, s))

                                             THE stack_out
                                                 =>
                                                 pr ("SPLIT " + i2s s + "->" + i2s t + " " +
                                                     st::stack_to_string stack_out + "->" +
                                                     st::stack_to_string stack_in + "\n"
                                                    );

                                             NULL => error "split: stack_out";
                                        esac;

                                    if (debug and *fp_trace_mode_intel32)   apply log edges;   fi;

                                    st::depth stack_in == 0
                                      ??  gen_popping_code (target_id, edges)
                                      ::  gen_repair_code  (target_id, stack_in, edges);
                                };

                            iht::keyed_apply split edges_to_split;

                            mcg::note_topology_changes mcg';

                            mcg';
                        };

                    /*------------------------------------------------------------------
                     * Process all blocks which are not the entry or the exit
                     *------------------------------------------------------------------*/
                    stamp = REF 0;

                    fun rewrite_all_blocks (_, mcg::BBLOCK { kind=>mcg::START, ... } ) =>  ();
                        rewrite_all_blocks (_, mcg::BBLOCK { kind=>mcg::STOP,  ... } ) =>  ();

                        rewrite_all_blocks (blknum, block as mcg::BBLOCK { ops, labels, notes, ... } )
                            =>
                            {
                                if (debug and *fp_debug_mode_intel32)
                                    #
                                    apply  (\\ l = pr (lbl::codelabel_to_string l + ":\n"))
                                           *labels;
                                fi;

                                live_in  = iht::get  live_in_table   blknum;
                                live_out = iht::get  live_out_table  blknum;

                                st = rewrite ( *stamp, blknum, block,
                                               ops, live_in, live_out,
                                               notes
                                             );

                                stamp := st;            # Update stamp.
                            };
                    end

                    # ------------------------------------------------------------------
                    # Translate code within a basic block.
                    # Each instruction is given a unique stamp for identifying last
                    # uses.
                    # ------------------------------------------------------------------
                    also
                    fun rewrite (stamp, blknum, block, ops, live_in, live_out, notes)
                        =
                        {   (shuffle_in (blknum, block, live_in))
                                ->
                                (stack_in, stack, code);

                            # Dump instructions when encountering a bug:
                            #
                            fun bug msg
                                =
                                {   pr ("-------- bug in block " + i2s blknum + " ----\n");
                                    dump *ops;
                                    error msg;
                                };

                            fun loop (stamp, [], [], code)
                                    =>
                                    (stamp, code);

                                loop (stamp, instruction ! rest, (last_use, dead) ! last_uses, code)
                                    =>
                                    {   fun mark (table, [])
                                                =>
                                                ();

                                            mark (table, r ! rs)
                                                =>
                                                {   rwv::set (table, rkj::intrakind_register_id_of r, stamp);
                                                    mark (table, rs);
                                                };
                                        end;

                                        mark (last_use_table, last_use); #  mark all last uses

                                        trans (stamp, instruction, [], rest, dead, last_uses, code);
                                    };

                                loop _ => error "loop";
                            end

                            # Main routine that does the actual translation.
                            # A few reminders:
                            #  o  The instructions are processed in normal order
                            #     and generated in the reversed order.
                            #  o  (Local) liveness is computed at the same time.
                            #  o  For each use, we have to find out whether it is
                            #     the last use.  If so, we can kill it and reclaim
                            #     the stack entry at the same time.
                            #
                            also
                            fun trans (stamp, instruction, an, rest, dead, last_uses, code)
                                =
                                {   # Call this fate when
                                    # done with code generation:
                                    #
                                    fun finish_fn code
                                        =
                                        loop (stamp+1, rest, last_uses, code);

                                    fun kill_the_dead (dead, code)
                                        =
                                        kill (dead, code)
                                        where
                                            fun kill ([], code)
                                                    =>
                                                    finish_fn code;

                                                kill (f ! fs, code)
                                                    =>
                                                    {   fx = rkj::intrakind_register_id_of f;

                                                        if (debug and debug_dead )
                                                           pr("DEAD " + freg_to_string f + " in " +
                                                              st::stack_to_string stack + "\n");
                                                        fi;

                                                        #  not a physical register
                                                        if (fx >= 8 )
                                                            kill (fs, code);
                                                        else
                                                            i = st::fp (stack, fx);

                                                            if (debug and debug_dead )
                                                                pr("KILLING " + freg_to_string f +
                                                                   "=%st(" + i2s i + ")\n");
                                                            fi;

                                                            if (i < 0 )
                                                                 kill (fs, code); #  Dead already
                                                            elif (i == 0)
                                                                 st::pop stack;
                                                                 kill (fs, pop_st ! code);
                                                            else
                                                                 st::xch (stack, 0, i); st::pop stack;
                                                                 kill (fs, mcf::fstpl (st_fn i) ! code);
                                                            fi;
                                                        fi;
                                                    };
                                            end;                        # fun kill
                                        end;                    # where (fun kill_the_dead)

                                    # Call this fate when
                                    # done with floating point
                                    # code generation.  Remove all
                                    # dead code first:
                                    #
                                    fun done_fn code
                                        =
                                        kill_the_dead (dead, code);

                                    # Is this the last use
                                    # of register f?
                                    #
                                    fun is_last_use f
                                        =
                                        rwv::get (last_use_table, f) == stamp;

                                    # Is this value dead?
                                    #
                                    fun is_dead f
                                        =
                                        loop dead
                                        where
                                            fun loop [] => FALSE;
                                                loop (r ! rs) => rkj::codetemps_are_same_color (f, r) or loop rs;
                                            end;
                                        end;

                                    # Dump the stack before each intruction for debugging:
                                    #
                                    fun log ()
                                        =
                                        if (debug   and   *fp_trace_mode_intel32)

                                             pr (st::stack_to_string stack + assemble instruction + "...\n");
                                        fi;

                                    # Find the location of a source register:
                                    #
                                    fun getfs (f)
                                        =
                                        {   fx = rkj::intrakind_register_id_of f;
                                            s  = st::fp (stack, fx);

                                            (is_last_use fx,  s);
                                        };

                                    #  Generate memory to memory move:
                                    #
                                    fun mmmove (fsize, src, dst)
                                        =
                                        {   st::non_full stack;
                                            code = fld_fn (fsize, src) ! code;
                                            code = mark (fstp_fn (fsize, dst), an) ! code;
                                            done_fn code;
                                        };

                                    #  Allocate a new register in %st (0):
                                    #
                                    fun allot (f, code)
                                        =
                                        {   st::push (stack, rkj::intrakind_register_id_of f);
                                            code;
                                        };

                                    # register -> register move
                                    #
                                    fun rrmove (fs, fd)
                                        =
                                        if (rkj::codetemps_are_same_color (fs, fd))
                                            #
                                            done_fn code;
                                        else
                                            my (dead, ss) = getfs fs;

                                            if dead
                                                #
                                                # fs is dead.

                                                st::set (stack, ss, rkj::intrakind_register_id_of fd);          # Rename fd to fs.
                                                done_fn code;                                   # No code is generated.
                                            else
                                                # fs is not dead; push it onto %st (0);
                                                # set fd to %st (0)

                                                code = allot (fd, code);
                                                done_fn (mark (mcf::fldl (st_fn ss), an) ! code);
                                            fi;
                                        fi;

                                    # memory -> register move.
                                    # Do dead code elimination here.
                                    #
                                    fun mrmove (fsize, src, fd)
                                        =
                                        if   (is_dead fd )

                                             finish_fn code;            #  value has been killed
                                        else
                                             code = allot (fd, code);
                                             done_fn (mark (fld_fn (fsize, src), an) ! code);
                                        fi;

                                    # Exchange %st (n) and %st (0):
                                    #
                                    fun xch n
                                        =
                                        {   st::xch (stack, 0, n);
                                            fxch_fn n;
                                        };

                                    # Push %st (n) onto the stack:
                                    #
                                    fun push n
                                        =
                                        {   st::push (stack,-2);
                                            mcf::fldl (st_fn n);
                                        };

                                    # Push mem onto the stack:
                                    #
                                    fun pushmem src
                                        =
                                        {   st::push (stack,-2);
                                            mcf::fldl (src);
                                        };

                                    # register -> memory move.
                                    # Use pop version of the opcode
                                    # if it is the last use:
                                    #
                                    fun rmmove (fsize, fs, dst)
                                        =
                                        {   fun fstp code
                                                =
                                                {   st::pop stack;
                                                    done_fn (mark (fstp_fn (fsize, dst), an) ! code);
                                                };

                                            fun fst code
                                                =
                                                done_fn (mark (fst_fn (fsize, dst), an) ! code);

                                            case (getfs fs)
                                                (TRUE,  0) =>  fstp code;
                                                (TRUE,  n) =>  fstp (xch n ! code);
                                                (FALSE, 0) =>  fst (code);
                                                (FALSE, n) =>  fst (xch n ! code);
                                            esac;
                                        };

                                    # Floating point move:
                                    #
                                    fun fmove { fsize, src=>mcf::FPR fs, dst=>mcf::FPR fd } =>  rrmove (fs, fd);
                                        fmove { fsize, src, dst=>mcf::FPR fd }            =>  mrmove (fsize, src, fd);
                                        fmove { fsize, src=>mcf::FPR fs, dst }            =>  rmmove (fsize, fs, dst);
                                        fmove { fsize, src, dst }                       =>  mmmove (fsize, src, dst);
                                    end;

                                    # Floating point integer load operator:
                                    #
                                    fun fiload { isize, ea, dst=>mcf::FPR fd }
                                            =>
                                            {   code = allot (fd, code);
                                                code = mark (fild_fn (isize, ea), an) ! code;
                                                done_fn code;
                                            };

                                        fiload { isize, ea, dst }
                                            =>
                                            {   code = mark (fild_fn (isize, ea), an) ! code;
                                                code = mcf::fstpl (dst) ! code; #  XXX
                                                done_fn code;
                                            };
                                    end;

                                    # Make a copy of register fs to %st (0).
                                    #
                                    fun moveregtotop (fs, code)
                                        =
                                        case (getfs fs)
                                            (TRUE,  0) =>  code;
                                            (TRUE,  n) =>  xch n ! code;
                                            (FALSE, n) =>  push n ! code;
                                        esac;

                                    fun movememtotop (fsize, mem, code)
                                        =
                                        {   st::push (stack, -2);
                                            fld_fn (fsize, mem) ! code;
                                        };

                                    # Move an operand to top of stack:
                                    #
                                    fun movetotop (fsize, mcf::FPR fs, code) =>  moveregtotop (fs, code);
                                        movetotop (fsize, mem,       code) =>  movememtotop (fsize, mem, code);
                                    end;

                                    fun store_result (fsize, dst, n, code)
                                        =
                                        case dst

                                            mcf::FPR fd
                                                =>
                                                {   st::set (stack, n, rkj::intrakind_register_id_of fd);
                                                    done_fn code;
                                                };

                                            mem =>
                                                {   code =  (n == 0)  ??          code
                                                                      ::  xch n ! code;

                                                    st::pop stack;
                                                    done_fn (fstp_fn (fsize, mem) ! code);
                                                };
                                        esac;

                                    # Floating point unary operator:
                                    #
                                    fun funop { fsize, un_op, src, dst }
                                        =
                                        {   code = movetotop (fsize, src, code);
                                            code = mark (mcf::funary un_op, an) ! code;

                                            # Moronic hack to deal with partial tangent!        XXX BUGGO FIXME
                                            #
                                            code =
                                                case un_op

                                                    mcf::FPTAN
                                                        =>
                                                        {   if (st::depth stack >= 7 ) error "FPTAN"; fi;
                                                            pop_st ! code;                      #  pop the useless 1.0
                                                        };

                                                    _   => code;
                                                esac;

                                            store_result (fsize, dst, 0, code);
                                        };

                                    # Floating point binary operator.
                                    # Note:
                                    #    binop src, dst
                                    #    means dst := dst binop src
                                    #          (lsrc := lsrc binop rsrc)
                                    #    on the intel32
                                    #
                                    fun fbinop { fsize, bin_op, lsrc, rsrc, dst }
                                        =
                                        {   # generate code and set %st (n) = fd */

                                            #  op2 := op1 - op2

                                            fun op (bin_op, op1, op2, n, code)
                                                =
                                                {   code = mark (mcf::fbinary { bin_op, src=>op1, dst=>op2 }, an)
                                                           ! code;
                                                    store_result (mcf::FP64, dst, n, code);
                                                };

                                            fun oper_r (bin_op, op1, op2, n, code)
                                                =
                                                op (invert bin_op, op1, op2, n, code);

                                            fun oper_p (bin_op, op1, op2, n, code)
                                                =
                                                {   st::pop stack;
                                                    op (pop bin_op, op1, op2, n - 1, code);
                                                };

                                            fun oper_rp (bin_op, op1, op2, n, code)
                                                =
                                                {   st::pop stack;
                                                    oper_r (pop bin_op, op1, op2, n - 1, code);
                                                };

                                            # Many special cases to consider.
                                            # Basically, try to reuse stack space as
                                            # much as possible by taking advantage of last uses.
                                            #
                                            #  Stack=[st (0)=3.0 st (1)=2.0]
                                            #    fsub   %st (1), %st [1, 2.0]
                                            #    fsubr  %st (1), %st [-1, 2.0]
                                            #    fsub   %st, %st (1) [3.0, 1.0]
                                            #    fsubr  %st, %st (1) [3.0,-1.0]
                                            #
                                            #    fsubp  %st, %st (1) [1]
                                            #    fsubrp %st, %st (1) [-1]
                                            #  So,
                                            #    fsub  %st (n), %st (means %st - %st (n) -> %st)
                                            #    fsub  %st, %st (n) (means %st - %st (n) -> %st (n))
                                            #    fsubr %st (n), %st (means %st (n) - %st -> %st)
                                            #    fsubr %st, %st (n) (means %st (n) - %st -> %st (n))
                                            #
                                            fun reg2 (fx, fy)
                                                =
                                                {   my (dx, sx) = getfs fx;
                                                    my (dy, sy) = getfs fy;

                                                    fun loop (dx, sx, dy, sy, code)
                                                        =
                                                        #    op1,   op2 (dst)
                                                        case (dx, sx, dy, sy)

                                                            (TRUE,  0, FALSE, n) =>  op    (bin_op, st_fn n, st0, 0, code);
                                                            (FALSE, n, TRUE,  0) =>  oper_r  (bin_op, st_fn n, st0, 0, code);

                                                            (TRUE,  n, TRUE,  0) =>  oper_rp (bin_op, st0, st_fn n, n, code);
                                                            (TRUE,  0, TRUE,  n) =>  oper_p  (bin_op, st0, st_fn n, n, code);

                                                            (FALSE, 0, TRUE,  n) =>  op    (bin_op, st0, st_fn n, n, code);
                                                            (TRUE,  n, FALSE, 0) =>  oper_r  (bin_op, st0, st_fn n, n, code);

                                                            (TRUE, sx, dy, sy)
                                                                =>
                                                                loop (TRUE, 0, dy, sy, xch sx ! code);

                                                            (dx, sx, TRUE, sy)
                                                                =>
                                                                loop (dx, sx, TRUE, 0, xch sy ! code);

                                                            (FALSE, sx, FALSE, sy)
                                                                =>
                                                                loop (TRUE, 0, FALSE, sy+1, push sx ! code);
                                                        esac;

                                                    if (sx == sy )              # Same register.

                                                        code = case (dx, sx)
                                                                   (TRUE,  0) =>  code;
                                                                   (TRUE,  n) =>  xch n ! code;
                                                                   (FALSE, n) =>  push n ! code;
                                                               esac;

                                                        op (bin_op, st0, st0, 0, code);

                                                    else
                                                        loop (dx, sx, dy, sy, code);
                                                    fi;
                                                };

                                            # reg/mem operands
                                            #
                                            fun regmem (bin_op, fx, mem)
                                                =
                                                case (getfs fx)

                                                     (TRUE,  0) =>  op (bin_op, mem, st0, 0, code);
                                                     (TRUE,  n) =>  op (bin_op, mem, st0, 0, xch n ! code);
                                                     (FALSE, n) =>  op (bin_op, mem, st0, 0, push n ! code);
                                                esac;

                                            # Two memory operands. Optimize the case when
                                            # the two operands are identical.
                                            #
                                            fun mem2 (lsrc, rsrc)
                                                =
                                                {   st::push (stack,-2);

                                                    code = fld_fn (fsize, lsrc) ! code;

                                                    rsrc =   mu::eq_operand (lsrc, rsrc)
                                                                 ??  st0
                                                                 ::  rsrc;

                                                    op (bin_op, rsrc, st0, 0, code);
                                                };

                                            fun process (mcf::FPR fx, mcf::FPR fy) => reg2 (fx, fy);
                                                process (mcf::FPR fx, mem)       => regmem (bin_op, fx, mem);
                                                process (mem, mcf::FPR fy)       => regmem (invert bin_op, fy, mem);
                                                process (lsrc, rsrc)           => mem2 (lsrc, rsrc);
                                            end;

                                            process (lsrc, rsrc);
                                        };

                                    # Floating point binary operator with integer conversion:
                                    #
                                    fun fibinop { isize, bin_op, lsrc, rsrc, dst }
                                        =
                                        {   fun op (bin_op, src, code)
                                                =
                                                {   code = mark (mcf::fibinary { bin_op, src }, an)
                                                                  ! code;

                                                    store_result (mcf::FP64, dst, 0, code);
                                                };

                                            fun regmem (bin_op, fx, mem)
                                                =
                                                case (getfs fx)
                                                    (TRUE,  0) =>  op (bin_op, mem, code);
                                                    (TRUE,  n) =>  op (bin_op, mem, xch n ! code);
                                                    (FALSE, n) =>  op (bin_op, mem, push n ! code);
                                                esac;

                                            case (lsrc, rsrc)
                                                 (mcf::FPR fx, mem)  =>  regmem (bin_op, fx, mem);
                                                 (lsrc,      rsrc) =>  op (bin_op, rsrc, pushmem lsrc ! code);
                                            esac;
                                        };

                                    # Floating point comparison
                                    # We have to make sure there are enough registers.
                                    # The trick is that tmp is always a physical register.
                                    # So we can always use it as temporary space if we
                                    # have run out.
                                    #
                                    fun fcmp { i, fsize, lsrc, rsrc }
                                        =
                                        {   fun fucompp code
                                                =
                                                {   st::pop stack; st::pop stack;

                                                    i   ??   pop_st !  mark (mcf::fucomip (st_fn 1), an) ! code
                                                        ::   mark (mcf::fucompp, an) ! code;
                                               };

                                            fun fucomp n
                                                =
                                                {   st::pop stack;

                                                    mark
                                                      ( (i ?? mcf::fucomip :: mcf::fucomp)  (st_fn n),
                                                        an
                                                      );
                                                };

                                            fun fucom n
                                                =
                                                mark ((i ?? mcf::fucomi :: mcf::fucom) (st_fn n), an);

                                            fun genmemcmp ()
                                                =
                                                {   code = movememtotop (fsize, rsrc, code);
                                                    code = movememtotop (fsize, lsrc, code);

                                                    finish_fn (fucompp (code));
                                                };

                                            fun genmemregcmp (lsrc, fy)
                                                =
                                                case (getfs fy)

                                                    (FALSE, n)
                                                        =>
                                                        {   code = movememtotop (fsize, lsrc, code);
                                                            finish_fn (fucomp (n+1) ! code);
                                                        };

                                                    (TRUE, n)
                                                        =>
                                                        {   code =   n == 0  ??  code
                                                                             ::  xch n ! code;

                                                            code = movememtotop (fsize, lsrc, code);

                                                            finish_fn (fucompp code);
                                                        };
                                                esac;

                                            fun genregmemcmp (fx, rsrc)
                                                =
                                                {   code = case (getfs fx)

                                                               (TRUE, n)
                                                                   =>
                                                                   {   code =   n == 0  ??  code
                                                                                        ::  xch n ! code;

                                                                       code = movememtotop (fsize, rsrc, code);

                                                                       xch 1 ! code;
                                                                   };

                                                               (FALSE, n)
                                                                   =>
                                                                   {   code = movememtotop (fsize, rsrc, code);
                                                                       push (n+1) ! code;
                                                                   };
                                                            esac;

                                                    finish_fn (fucompp code);
                                                };

                                            # Deal with the special case
                                            # where both sources are
                                            # in the same register
                                            #
                                            fun regsame (dx, sx)
                                                =
                                                finish_fn (cmp ! code)
                                                where
                                                    my (code, cmp)
                                                        =
                                                        case (dx, sx)
                                                            (TRUE,  0) =>  (code, fucomp 0);    #  pop once!
                                                            (FALSE, 0) =>  (code, fucom  0);    #  Don't pop!

                                                            (TRUE,  n) =>  (xch n ! code, fucomp 0);
                                                            (FALSE, n) =>  (xch n ! code, fucom 0);
                                                        esac;
                                                end;

                                            fun reg2 (fx, fy)
                                                =
                                                # Special case is when things are already in place.
                                                # Note: should also generate FUCOM and FUCOMP!!!        XXX BUGGO FIXME
                                                #
                                                {   my (dx, sx) = getfs fx;
                                                    my (dy, sy) = getfs fy;

                                                    fun fstp n
                                                        =
                                                        {   st::xch (stack, n, 0);
                                                            st::pop stack;
                                                            mcf::fstpl (st_fn n);
                                                        };

                                                    if (sx == sy)

                                                        regsame (dx, sx);                               # Same register!
                                                    else
                                                        # First, move sx to %st (0):
                                                        #
                                                        my (sy, code)
                                                            =
                                                            if (sx == 0)                                # There already.
                                                                 ( sy,
                                                                   code
                                                                 );
                                                            else
                                                                 ( sy == 0 ?? sx :: sy,
                                                                   xch sx ! code
                                                                 );
                                                            fi;

                                                        # Generate the appropriate comparison op
                                                        #
                                                        my (sy, code, pop_y)
                                                            =
                                                            case (dx, dy, sy)
                                                                (TRUE,  TRUE, 0) =>  (-1, fucompp code, FALSE);
                                                                (TRUE,  _,    _) =>  (sy - 1, fucomp sy ! code, dy);
                                                                (FALSE, _,    _) =>  (sy, fucom sy ! code, dy);
                                                            esac;

                                                        # Pop fy if it is dead and hasn't already
                                                        # been popped.
                                                        #
                                                        code =   pop_y    ??   fstp sy ! code
                                                                          ::             code;

                                                        finish_fn code;
                                                    fi;
                                                };

                                            case (lsrc, rsrc)
                                                (mcf::FPR x, mcf::FPR y) =>  reg2 (x, y);
                                                (mcf::FPR x, mem)      =>  genregmemcmp (x, mem);
                                                (mem, mcf::FPR y)      =>  genmemregcmp (mem, y);
                                                _                    =>  genmemcmp ();
                                            esac;
                                        };

                                    fun pr_copy (dst, src)
                                        =
                                        paired_lists::apply
                                            (\\ (fd, fs)
                                                =
                                                pr (freg_to_string (fd) + "<-" + freg_to_string fs + " ")
                                            )
                                            (dst, src);

                                    # Parallel copy magic.
                                    #
                                    # For each src register, we find out
                                    #
                                    #  1. Whether it is the last use, and if so,
                                    #  2. whether it is used more than once.
                                    #
                                    # If a source is a last and unique use,
                                    # then we can simply rename it to
                                    # the appropriate destination register:
                                    #
                                    fun fcopy (mcf::COPY { dst, src, tmp, ... } )
                                            =>
                                            {
                                                fun loop ([], [], copies, renames)
                                                        =>
                                                        (copies, renames);

                                                    loop (fd ! fds, fs ! fss, copies, renames)
                                                        =>
                                                        {   fsx = rkj::intrakind_register_id_of fs;

                                                            if (is_last_use fsx)

                                                                  if (rwv::get (use_table, fsx) != stamp)

                                                                        # Unused.

                                                                        rwv::set (use_table, fsx, stamp);

                                                                        loop
                                                                          ( fds,
                                                                            fss,
                                                                            copies,

                                                                            rkj::codetemps_are_same_color (fd, fs)
                                                                                ??            renames
                                                                                :: (fd, fs) ! renames
                                                                          );

                                                                   else
                                                                        loop (fds, fss, (fd, fs) ! copies, renames);
                                                                   fi;

                                                              else
                                                                   loop (fds, fss, (fd, fs) ! copies, renames);
                                                              fi;
                                                          };

                                                    loop _
                                                        =>
                                                        error "fcopy::loop";
                                                end;

                                                # Generate code for the copies:
                                                #
                                                fun gen_copy ([], code)
                                                        =>
                                                        code;

                                                    gen_copy((fd, fs) ! copies, code)
                                                        =>
                                                        {   ss   = st::fp (stack, rkj::intrakind_register_id_of fs);
                                                            st::push (stack, rkj::intrakind_register_id_of fd);
                                                            code = mcf::fldl (st_fn ss) ! code;
                                                            gen_copy (copies, code);
                                                        };
                                                end;

                                                # Perform the renaming.
                                                # It must be done in parallel!
                                                #
                                                fun renaming renames
                                                    =
                                                    {   ss = map  (\\ (_, fs) = st::fp (stack, rkj::intrakind_register_id_of fs))
                                                                  renames;

                                                        paired_lists::apply
                                                            (\\ ((fd, _), ss)
                                                                =
                                                                st::set (stack, ss, rkj::intrakind_register_id_of fd)
                                                            )
                                                            (renames, ss);
                                                    };

                                                # if debug then
                                                #              (paired_lists::apply (\\ (fd, fs) =>
                                                #                  pr (fregToString (regmap fd) + "<-" +
                                                #                     fregToString (regmap fs) + " ")
                                                #                  ) (dst, src);
                                                #               pr "\n")
                                                #           else ()

                                                my (copies, renames)
                                                    =
                                                    loop (dst, src, [], []);

                                                code = gen_copy (copies, code);

                                                renaming renames;

                                                   case tmp

                                                       THE (mcf::FPR f)
                                                           =>
                                                           {   if   (debug and debug_dead )

                                                                    pr("KILLING tmp " + freg_to_string f + "\n");
                                                               fi;

                                                               st::kill (stack, f);
                                                           };

                                                       _   => ();
                                                   esac;

                                                   done_fn code;
                                            };

                                        fcopy _ => error "fcopy";
                                    end;

                                    fun call (instruction, return)
                                        =
                                        {
                                            code = mark (mcf::BASE_OP instruction, an) ! code;

                                            return_set = rkj::sortuniq_colored_codetemps  (get_float_codetemp_infos return);

                                            case return_set
                                                #
                                                []  => ();
                                                [r] => st::push (stack, rkj::intrakind_register_id_of r);
                                                _   => error "can't return more than one fp argument (yet)";
                                            esac;

                                            kill_the_dead (list::filter is_dead return_set, code);
                                        };

                                    fun intel32trans instruction
                                        =
                                        case instruction
                                            mcf::FMOVE x   => { log(); fmove x;};
                                            mcf::FBINOP x  => { log(); fbinop x;};
                                            mcf::FIBINOP x => { log(); fibinop x;};
                                            mcf::FUNOP x   => { log(); funop x;};
                                            mcf::FILOAD x  => { log(); fiload x;};
                                            mcf::FCMP x    => { log(); fcmp x;};

                                            # Handle calling convention:
                                            #
                                            mcf::CALL { return, ... }
                                                =>
                                                {   log();
                                                    call (instruction, return);
                                                };

                                            # Catch instructions that absolutely
                                            # should not have been generated
                                            # at this point:
                                            #
                                            ( mcf::FLD1    | mcf::FLDL2E    | mcf::FLDLG2     | mcf::FLDLN2  | mcf::FLDPI
                                            | mcf::FLDZ    | mcf::FLDL _    | mcf::FLDS _     | mcf::FLDT _
                                            | mcf::FILD _  | mcf::FILDL _   | mcf::FILDLL _
                                            | mcf::FENV _  | mcf::FBINARY _ | mcf::FIBINARY _ | mcf::FUNARY _
                                            | mcf::FUCOMPP | mcf::FUCOM _   | mcf::FUCOMP _   | mcf::FCOMPP  | mcf::FXCH _
                                            | mcf::FCOMI _ | mcf::FCOMIP _  | mcf::FUCOMI _   | mcf::FUCOMIP _
                                            | mcf::FSTPL _ | mcf::FSTPS _   | mcf::FSTPT _    | mcf::FSTL _  | mcf::FSTS _
                                            )   =>
                                                bug ("Illegal FP instructions");

                                            # Leave other instructions untouched:
                                            #
                                            other_instruction
                                                =>
                                                finish_fn (mark (mcf::BASE_OP other_instruction, an) ! code);
                                        esac;

                                    case instruction
                                        #
                                        mcf::NOTE { note, op }
                                            =>
                                             trans (stamp, op, note ! an, rest, dead, last_uses, code);

                                        mcf::COPY { kind => rkj::FLOAT_REGISTER, ... }
                                            =>
                                            {   log();
                                                fcopy instruction;
                                            };

                                        mcf::LIVE _
                                            =>
                                            done_fn (mark (instruction, an) ! code);

                                        mcf::BASE_OP instruction
                                            =>
                                            intel32trans instruction;

                                        _  => finish_fn (mark (instruction, an) ! code);
                                    esac;
                                };                              # fun trans

                             # Check the translation result
                             # to see if it matches the
                             # original code:
                             #
                            fun check_translation (stack_in, stack_out, ops)
                                =
                                {   n = REF (st::depth stack_in);

                                    fun push () =  n := *n + 1;
                                    fun pop () =  n := *n - 1;

                                    fun scan (mcf::BASE_OP (mcf::FBINARY { bin_op, ... } ))
                                            =>
                                            case bin_op
                                                ( mcf::FADDP | mcf::FSUBP | mcf::FSUBRP | mcf::FMULP
                                                | mcf::FDIVP | mcf::FDIVRP) => pop();
                                                _ => ();
                                            esac;

                                        scan (mcf::BASE_OP (mcf::FIBINARY { bin_op, ... } )) => ();
                                        scan (mcf::BASE_OP (mcf::FUNARY mcf::FPTAN)) => push();
                                        scan (mcf::BASE_OP (mcf::FUNARY _)) => ();
                                        scan (mcf::BASE_OP (mcf::FLDL (mcf::ST n))) => push();
                                        scan (mcf::BASE_OP (mcf::FLDL mem)) => push();
                                        scan (mcf::BASE_OP (mcf::FLDS mem)) => push();
                                        scan (mcf::BASE_OP (mcf::FLDT mem)) => push();
                                        scan (mcf::BASE_OP (mcf::FSTL (mcf::ST n))) => ();
                                        scan (mcf::BASE_OP (mcf::FSTPL (mcf::ST n))) => pop();
                                        scan (mcf::BASE_OP (mcf::FSTL mem)) => ();
                                        scan (mcf::BASE_OP (mcf::FSTS mem)) => ();
                                        scan (mcf::BASE_OP (mcf::FSTPL mem)) => pop();
                                        scan (mcf::BASE_OP (mcf::FSTPS mem)) => pop();
                                        scan (mcf::BASE_OP (mcf::FSTPT mem)) => pop();
                                        scan (mcf::BASE_OP (mcf::FXCH { operand=>i, ... } )) => ();
                                        scan (mcf::BASE_OP (mcf::FUCOM _)) => ();
                                        scan (mcf::BASE_OP (mcf::FUCOMP _)) => pop();
                                        scan (mcf::BASE_OP (mcf::FUCOMPP)) => { pop(); pop();};
                                        scan (mcf::BASE_OP (mcf::FILD mem)) => push();
                                        scan (mcf::BASE_OP (mcf::FILDL mem)) => push();
                                        scan (mcf::BASE_OP (mcf::FILDLL mem)) => push();

                                        scan (mcf::BASE_OP (mcf::CALL { return, ... } ))
                                            =>
                                            {   n := 0; #  Clear the stack

                                                #  Simulate the pushing of arguments:
                                                #
                                                {   return_set = rkj::sortuniq_colored_codetemps  (get_float_codetemp_infos return);
                                                    apply (\\ _ = push()) return_set;
                                                };
                                            };
                                        scan _ => ();
                                    end;

                                    apply scan (reverse ops);
                                    n = *n;
                                    m = st::depth stack_out;

                                    if (n != m)
                                         dump ops;
                                         bug("Bad translation n=" + i2s n +  " expected=" + i2s m + "\n");
                                    fi;
                                };

                            # Dump the initial code:
                            #
                            if   (debug and *fp_debug_mode_intel32)

                                 pr("-------- block " + i2s blknum + " ----" +
                                      registerlist_to_string live_in + " " +
                                      st::stack_to_string stack_in + "\n");

                                 dump *ops;
                                 pr("next=");
                                 apply (\\ b => pr (i2s b + " "); end ) (mcg.next blknum);
                                 pr "\n";

                            fi;

                            # Compute the last uses:
                            #
                            last_use =   compute_last_use (blknum, ops, live_out);

                            # Rewrite the code:
                            #
                            my (stamp, ops')
                                =
                                loop (stamp, reverse *ops, last_use, code);

                            # Insert shuffle code at the end if necessary:
                            #
                            ops' = shuffle_out (stack, ops', blknum, block, live_out);

                            # Dump translation:
                            #
                            if (debug and *fp_debug_mode_intel32)
                                #
                                pr("-------- translation " + i2s blknum + "----" +
                                   registerlist_to_string live_in + " " +
                                   st::stack_to_string stack_in + "\n");

                                dump ops';

                                pr("-------- done " + i2s blknum + "----" +
                                   registerlist_to_string live_out + " " +
                                   st::stack_to_string stack + "\n");
                             fi;

                            # Check if things are okay:
                            #
                            if (debug and sanity_check)
                                #
                                check_translation (stack_in, stack, ops');
                            fi;

                            ops := ops';                        # Update the basic-block machine-instruction list.

                            stamp;
                        };                                      # fun rewrite

                    # Translate all blocks:
                    #
                    stamp := rgk::codetemp_id_if_above;

                    mcg.forall_nodes  rewrite_all_blocks;

                    # If we found critical edges
                    # then we have to split them:
                    #
                    if (iht::vals_count edges_to_split == 0)
                        #
                        mcg';
                    else
                        repair_critical_edges  mcg';
                    fi;
                };
            end;
        };              # generic package floating_point_code_intel32_g
end;                    # stipulate

Comments and suggestions to: bugs@mythryl.org