src/lib/compiler/back/low/main/nextcode/emit-treecode-heapcleaner-calls-g.pkg

15.4.295 src/lib/compiler/back/low/main/nextcode/emit-treecode-heapcleaner-calls-g.pkg

# emit-treecode-heapcleaner-calls-g.pkg
#
# For general background see
#
#     src/A.GARBAGE-COLLECTOR.OVERVIEW
#
# This package is responsible for generating code
# to invoke the  heapcleaner ("garbage collector").
# It is essentially dedicated support infrastructure for
#
#     src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg
#
# We get called at four points during compiles:
#
#    1) Before beginning compilation of a package, to reset our
#       worklists.
#
#    2) While emitting code for a given package cccomponent,
#       to deposit actual heaplimit checks looking like
#
#           if (heap_allocation_pointer > heap_allocation_limit)  goto ...
#
#       using our three entrypoints
#
#           put_heaplimit_check_and_push_heapcleaner_call_spec_for_public_fn
#           put_heaplimit_check_and_push_heapcleaner_call_spec_for_unoptimized_private_fn
#           put_heaplimit_check_and_push_heapcleaner_call_spec_for_optimized_private_fn
#
#       For each heaplimit check so generated, we save a description
#       of the live registers at that point.  These go on one of two
#       private worklists:
#
#            public_fn_heaplimit_checks__global
#           private_fn_heaplimit_checks__global
#
#    3) When code generation for a given package cccomponent
#       is complete we get called via our entrypoint
#
#           put_all_publicfn_heapcleaner_longjumps_and_all_privatefn_heapcleaner_calls_for_cccomponent
#
#       At this point we process the above two __global lists and
#       emit heapcleaner calls for the private fns and longjumps
#       to (as-yet-nonexistent) heapcleaner calls for the public fns.
#
#       We save specs for the latter heapclaner calls on a third private worklist
#
#           longjumps_to_heapcleaner_calls__global
#
#    4) When code generation for cccomponents in a given package is complete
#       we get called via our entrypoint
#
#           put_all_publicfn_heapcleaner_calls_for_package
#
#       At this point we process the third worklist, emitting all
#       public-fn heapcleaner-call codeblocks.  To save codespace, when
#       possible we share these codeblocks between multiple heaplimit checks.
#
#
# Nomenclature:
#
#     A (heapcleaner) "root" is a live pointer into the heap.
#     which is to say, the root of a tree of live heap values
#     which the heapcleaner ("garbage collector") must NOT
#     recycle.  Much of our work in this package consists of
#     making sure that all roots get passed to the heapcleaner.
#
#
#
# We insert heaplimit checks at points determined by
#
#     src/lib/compiler/back/low/main/nextcode/pick-nextcode-fns-for-heaplimit-checks.pkg
#
# These checks work in conjunction with related code generated in
#
#     src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg
#
# The basic idea here is to buy time and space efficiency
# by structuring the heapcleaner ("garbage collector")
# invocation logic as a three-level hierarchy:
#
#     Level 1:
#         In every loop, we have a test like
#             if (heapcleaner_allocation_pointer > heapcleaner_allocation_limit)  longjump_to_heapcleaner_call();
#         In assembly code, that looks like
#             cmp heapcleaner_allocation_pointer, heapcleaner_allocation_limit
#             bgt longjump_to_heapcleaner_call                                  # "bgt" == "branch if greater-than"
#         We want the latter to be as small and fast as possible,
#         so on Intel32 these will typically be two-byte ops:
#         one byte of opcode and one byte of address.
#
#         On 32-bit RISCS they will have two bytes of address.
#         (On 32-bit RISCS with delay slots we try to further
#         optimize by putting the CMP instruction in a delay slot
#         and the BGT in the next block.)
#
#         (For private fns these heapchecks branch directly to
#         the heapcleaner calls, bypassing the level-2 longjumps.)
#
#     Level 2:
#         Longjumps to the actual code to call the heapcleaner.
#         These will generally need to have a full 32-bit address.
#         One such longjump can be shared among multiple Level-1 branches.
#
#     Level 3:
#         Codeblocks to actually call the heapcleaner.
#
#         The major problem to be solved by these blocks is that
#         at different points in the code we have live data in
#         different registers, and the types of data in those
#         registers also varies -- for example at one point
#         EAX may hold a 32-bit integer, but at another point it may
#         hold a pointer to a binary tree.  The garbage collector
#         needs to have all live pointers to keep it from recycling
#         a value we're using when it runs.
#
#         Thus, the main purpose of the heapcleaner-call blocks is to:
#
#           o Pack the live register contents into a standard
#             form intelligible to the heapcleaner. Unused
#             registers also need to be nulled out at this point.
#
#           o Call the heapcleaner.
#
#           o Unpack the original register contents back into
#             the registers, and resume execution.
#
#
#
#                -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#
#                "This new version is derived from the generic CallGC.
#                 It can handle derived pointers as roots and it can also be used as
#                 callbacks.  These extra facilities are neccessary for global
#                 optimizations  in the presence of heapcleaning."
#
#                        -- Allen Leung

# Compiled by:
#     src/lib/compiler/core.sublib

###              "I hate flowers. I paint them because they're
###               cheaper than models and they don't move."
###
###                            -- Georgia O'Keeffe

###              "We believe in rough concensus and working code."
###
###                                    -- David Clark, IETF

# We are invoked from:
#
#     src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg

stipulate
    package ctl =  global_controls;                             # global_controls                       is from   src/lib/compiler/toplevel/main/global-controls.pkg
    package cos =  registerkinds_junk::cos;                     # "cos" == "colorset".
    package err =  error_message;                               # error_message                         is from   src/lib/compiler/front/basics/errormsg/error-message.pkg
    package ncf =  nextcode_form;                               # nextcode_form                         is from   src/lib/compiler/back/top/nextcode/nextcode-form.pkg
    package frr =  nextcode_ramregions;                         # nextcode_ramregions                   is from   src/lib/compiler/back/low/main/nextcode/nextcode-ramregions.pkg
    package lbl =  codelabel;                                   # codelabel                             is from   src/lib/compiler/back/low/code/codelabel.pkg
    package lhn =  lowhalf_notes;                               # lowhalf_notes                         is from   src/lib/compiler/back/low/code/lowhalf-notes.pkg
    package lun =  large_unt;                                   # large_unt                             is from   src/lib/std/large-unt.pkg
    package rkj =  registerkinds_junk;                          # registerkinds_junk                    is from   src/lib/compiler/back/low/code/registerkinds-junk.pkg
    package rwv =  rw_vector;                                   # rw_vector                             is from   src/lib/std/src/rw-vector.pkg
    package sl  =  sorted_list;                                 # sorted_list                           is from   src/lib/compiler/back/low/library/sorted-list.pkg
herein

    generic package  put_treecode_heapcleaner_calls_g  (
        #            =================================
        #
                                                                # machine_properties_intel32            is from   src/lib/compiler/back/low/main/intel32/machine-properties-intel32.pkg
        package mp: Machine_Properties;                         # Machine_Properties                    is from   src/lib/compiler/back/low/main/main/machine-properties.api

                                                                # platform_register_info_intel32        is from   src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg
        package pri: Platform_Register_Info                     # Platform_Register_Info                is from   src/lib/compiler/back/low/main/nextcode/platform-register-info.api
                     where                                      # "tcf" == "treecode_form".
                         tcf::rgn == nextcode_ramregions;       # "rgn" == "region".

        package tcs: Treecode_Codebuffer                                # Treecode_Codebuffer                   is from   src/lib/compiler/back/low/treecode/treecode-codebuffer.api
                     where
                         tcf == pri::tcf;                       # "tcf" == "treecode_form".

        package mcg: Machcode_Controlflow_Graph                 # Machcode_Controlflow_Graph            is from   src/lib/compiler/back/low/mcg/machcode-controlflow-graph.api
                     where
                         pop == tcs::cst::pop;                  # "pop" == "pseudo_op".
    )
    : (weak) Emit_Treecode_Heapcleaner_Calls                    # Emit_Treecode_Heapcleaner_Calls       is from   src/lib/compiler/back/low/main/nextcode/emit-treecode-heapcleaner-calls.api
    {
        # Export to client packages:
        #
        package tcs = tcs;                                      # "tcs" == "treecode_stream".
        package mcg = mcg;                                      # "mcg" == "machcode_controlflow_graph".

        stipulate
            package tcf =  pri::tcf;                            # "tcf" == "treecode_form".
            package cd  =  mp::heap_tags;
            package rgk =  pri::rgk;                            # "rgk" == "registerkinds".
        herein

            fun error msg
                =
                err::impossible("cleaner." + msg);

            Fun_Info
              =
              { max_possible_heapbytes_allocated_before_next_heaplimit_check:   Int,
                #
                live_registers:                                         List( tcf::Expression ),
                live_register_types:                                    List( ncf::Type ),
                #
                return:                                                 tcf::Void_Expression
              };
                #
                # This type is used (only) as an argument for:
                #
                #     put_heaplimit_check_and_push_heapcleaner_call_spec_for_public_fn
                #     put_heaplimit_check_and_push_heapcleaner_call_spec_for_unoptimized_private_fn
                #     put_heaplimit_check_and_push_heapcleaner_call_spec_for_optimized_private_fn

            Stream = tcs::Treecode_Codebuffer
                      (
                        tcf::Void_Expression,
                        List( tcf::Expression ),
                        mcg::Machcode_Controlflow_Graph
                      );

            debug_heapcleaner
                =
                ctl::lowhalf::make_bool ("debug_heapcleaner", "heapcleaner invocation debug mode");

                                                                                                    # lowhalf_notes             is from   src/lib/compiler/back/low/code/lowhalf-notes.pkg
            zero_freq_note        =  lhn::execution_freq.x_to_note  0;
            heapcleaner_call_note =  lhn::call_heapcleaner.x_to_note ();
            no_optimization_note  =  lhn::no_optimization.x_to_note ();

            # The following type is used to encapsulate
            # all the information needed to generate code
            # to invoke the heapcleaner.
            #
            # The important fields are:
            #
            #    private:
            #                 Do we know all callers of this function
            #                 -- that is, is it an internal function?
            #
            #    optimized:   If this is TRUE, heapcleaner code generation is delayed
            #                 until we have performed all optimizations.
            #                 This is FALSE for normal Mythryl use.
            #
            #    heapcleaner_label:
            #                 The codelabel on the call-heapcleaner block.
            #
            #    live_registers:   The heapcleaner "roots" -- actually, all live registers.
            #
            #    rootholding_registers, floatholding_registers, intholding_registers:
            #                 live_registers partitioned into three classes:
            #                   o Registers containing integers.
            #                   o Registers containing floats.
            #                   o Registers containing heapcleaner roots -- pointers into the heap.
            #
            #    return:      How to return from the call-heapcleaner block.
            #
            Spec_For_Heapcleaner_Call                                                   # "spec" == "specification".
                =
                SPEC_FOR_HEAPCLEANER_CALL
                  {
                    fn_is_private:              Bool,                                   # Known function ?
                    fn_will_be_optimized:       Bool,                                   # Optimized?
                    #
                    label_on_heapcleaner_call:  Ref( lbl::Codelabel ),                  # The heaplimit checks branch either directly to this codelabel, or else branch to a longjump which jumps to it.
                    #
                    live_registers:             List( tcf::Expression ),                # All live registers.
                    #
                    rootholding_registers:      List( tcf::Int_Expression ),            # Live registers holding root  values.
                    intholding_registers:       List( tcf::Int_Expression ),            # Live registers holding int   values. (I.e., non-root values.)
                    floatholding_registers:     List( tcf::Float_Expression ),          # Live registers holding float values.
                    #
                    return:                     tcf::Void_Expression                    # How to return.
                  };

            Spec_For_Longjump_To_Heapcleaner_Call
                =
                SPEC_FOR_LONGJUMP_TO_HEAPCLEANER_CALL
                  {
                    spec_for_heapcleaner_call:  Spec_For_Heapcleaner_Call,                      #
                    labels_on_longjump:         Ref(  List(  lbl::Codelabel ) )         # One codelabel for each branch that jumps to us.
                  };

            ######################################################################
            # Implementation/architecture specific stuff starts here.
            ######################################################################

            # Extra space in allocation space
            # The Mythryl runtime system leaves around 4K of extra space
            # in the allocation space for safety.

            skid_pad_size_in_bytes = 4096;      # This has(?) to match   max_heapwords_to_allocate_between_heaplimit_checks   in   src/lib/compiler/back/low/main/nextcode/pick-nextcode-fns-for-heaplimit-checks.pkg
                                                # This has(?) to match   4 * ONE_K_BINARY                                     in   src/c/main/run-mythryl-code-and-runtime-eventloop.c

            bits_per_pointer = 32;              # Pointer width in bits.                                         64-BIT-ISSUE. XXX SUCKO FIXME.

            vfp = FALSE;                        # Don't use virtual frame ptr here.

            void = tcf::LITERAL 1;              # Representation of Mythryl's Void;                             XXX SUCKO FIXME this should be a manifest constant of some sort.
                                                # this is used to initialize registers.
            #
            fun make_int_literal i
                =
                tcf::LITERAL (tcf::mi::from_int (32, i));                                                       # 64-bit ISSUE. XXX SUCKO FIXME.

            # Callee-save registers
            # All callee-save registers are used
            # in the heapcleaner calling convention.
            #
            calleesaves                                                                                         # On Intel32 this is [ ebx, ecx, edx ].
                =                                                                                               # pri::miscregs = { ebx, ecx, edx, r10, r11, ... r31 }  on Intel32.
                list::take_n (pri::miscregs, mp::num_callee_saves);                                             # mp::num_callee_saves = 3 on Intel32 -- see src/lib/compiler/back/low/main/main/machine-properties-default.pkg

            # These are the registers in which the heapcleaner
            # looks for roots.  If we have fewer roots to pass,
            # we can null out the extra arg registers.  If we
            # have more roots to pass than arg registers in which
            # to put them, we can bundle the extras into a heap
            # record and pass a pointer to that record in one of
            # the arg registers:
            #
            heapcleaner_arg_registers
                =
                (   pri::stdlink vfp                                                                            # vreg 0                on Intel32.
                !   pri::stdclos vfp                                                                            # vreg 1                on Intel32.
                !   pri::stdfate vfp                                                                            # esi                   on Intel32.
                !   pri::stdarg  vfp                                                                            # ebp                   on Intel32.
                !   calleesaves                                                                                 # [ ebx, ecx, edx ]     on Intel32.
                );
                # This list is exported, but only used in       src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg
                # as an arg to                                  src/lib/compiler/back/low/main/nextcode/check-heapcleaner-calls-g.pkg

            # Synthesize treecode form of a call to the heapcleaner:
            #
            # This involves a jump into the C/assembly runtime via a
            # pointer maintained on the C stack, accessible via the
            # framepointer register, which may be a real register,
            # or a virtual register faked via creative use of the
            # stackpointer register:
            #
            treecode_which_calls_heapcleaner_via_framepointer
                =
                {   uses =  map  tcf::INT_EXPRESSION  heapcleaner_arg_registers;

                    defs =  uses;

                    # If we are playing the RISC game of doing
                    #
                    #     cmp heap_allocation_pointer, heap_allocation_limit
                    #
                    # in the delay slot and then preserving the resulting status
                    # register bits until we later do the actual
                    #
                    #     bgt gt, longjump_to_heapcleaner
                    #
                    # then we need to remember that we also have those
                    # status register bits as a def here:
                    #
                    defs =  case pri::heap_is_exhausted__test
                                #
                                THE platform_specific__heap_is_exhausted__test =>  tcf::FLAG_EXPRESSION platform_specific__heap_is_exhausted__test  !  defs;
                                NULL                                           =>                                                                      defs;
                            esac;

                    # Make treecode to call the heapcleaner.
                    #
                    # The pointer                 mp::run_heapcleaner__offset                           here
                    # corresponds to              run_heapcleaner_ptr                                   in   src/c/machine-dependent/prim.intel32.asm
                    # which is set up by          asm_run_mythryl_task                                  in   src/c/machine-dependent/prim.intel32.asm
                    # to REQUEST_HEAPCLEANING to  run_mythryl_task_and_runtime_eventloop__may_heapclean in   src/c/main/run-mythryl-code-and-runtime-eventloop.c
                    # which will call             clean_heap                                            in   src/c/heapcleaner/call-heapcleaner.c
                    #
                    # At least, that's the Intel32-backend story;
                    # other backends are similar:
                    #
                    heapcleaner_call                                                                                    # (*pri::framepointer[ mp::run_heapcleaner__offset ]) ();
                        =
                        tcf::CALL
                          {
                            funct   => tcf::LOAD  ( 32,                                                                 # 64-bit issue, obviously.
                                                    tcf::ADD  ( pri::address_width,
                                                                pri::framepointer vfp,
                                                                make_int_literal  mp::run_heapcleaner__offset           # run_heapcleaner__offset is 32 on Intel32.
                                                              ),
                                                    frr::stack
                                                  ),
                            targets => [],
                            defs,
                            uses,
                            region  => frr::stack,
                            pops    => 0
                          };

                    # Mark it with a heapcleaner_call annotation:
                    #
                    heapcleaner_call =   tcf::NOTE (heapcleaner_call, heapcleaner_call_note);
                    heapcleaner_call =   tcf::NOTE (heapcleaner_call, lhn::comment.x_to_note "call heapcleaner");
                    heapcleaner_call;
                };

            # Heapchunk tagwords:
            #
            fun make_unboxed_tagword  words =   lun::to_int (cd::make_tagword (words, cd::eight_byte_aligned_nonpointer_data_btag ));
            fun   make_boxed_tagword  words =   lun::to_int (cd::make_tagword (words, cd::pairs_and_records_btag));

            # The heap allocation pointer must
            # always be in a register!
            #
            heap_allocation_pointer_register
                =
                case pri::heap_allocation_pointer
                    #
                    tcf::CODETEMP_INFO(_, heap_allocation_pointer_register) =>  heap_allocation_pointer_register;
                    _                                             =>  error "heap_allocation_pointer must be a register";
                esac;

            # When checking for heap exhaustion by doing
            #
            #     (heap_allocation_pointer > heap_allocation_limit)
            #
            # should we use signed- or unsigned- greater-than compares?
            #
            # Either one may be faster, depending on target architecture:
            #
            heapcleaner_gt
                =
                pri::use_signed_heaplimit_check
                    ??  tcf::GT
                    ::  tcf::GTU;

            unlikely          =   lhn::branch_probability.x_to_note   probability::unlikely;

            # This is the straightforward way to test for
            #
            #     (heap_allocation_pointer > heap_allocation_limit)
            #
            normal__heap_is_exhausted__test                                                                     # The vanilla way to test for (heap_allocation_pointer > heap_allocation_limit);
                =                                                                                               # this vanilla approach may be overridden on a per-platform basis via pri::heap_is_exhausted__test
                tcf::CMP
                  ( bits_per_pointer,
                    heapcleaner_gt,                                                                             # Signed or unsigned   >   test, depending on platform.
                    pri::heap_allocation_pointer,                                                               # We allot heap memory just by advancing this pointer.
                    pri::heap_allocation_limit vfp                                                              # Heap is exhausted when heap_allocation_pointer reaches this point.
                  );

            ######################################################################
            # Private state                                                                                     # All three of these are:  More icky thread-hostile mutable global state. XXX SUCKO FIXME
            ######################################################################

            # The first thing we do is emit code for the
            #
            #     if (heap_allocation_pointer > heap_allocation_limit)  goto(label);
            #
            # checks in the code.  The code we're jumping to does't actually
            # exist at this point -- it is represented only by 'label'. This
            # label has lead to a heapcleaner call, and in particular to a
            # heapcleaner call customized for the particular pattern of
            # registers contents which are alive at the point where the
            # compare-and-branch is done.
            #
            # To make this work we push all each such label on a list as
            # we create it, together with a specification of the heapcleaner
            # call which it needs to lead to.
            #
            # We segregate these collected label-plus-specs into two lists,
            # one for heaplimit checks in public functions (basic code blocks) and
            # one for heaplimit checks in private functions.
            #
            # In a later pass (put_longjump_heapcleaner_calls) we scan these lists,
            # emitting label definitions plus appropriate code:
            #
             public_fn_heaplimit_checks__global =   REF ([]:  List( Spec_For_Heapcleaner_Call    ));            #
            private_fn_heaplimit_checks__global =   REF ([]:  List( Spec_For_Heapcleaner_Call    ));            #

            # During the above-mentioned   put_longjump_heapcleaner_calls
            # pass we consume the above two lists and in turn emit longjump
            # specs which get collected on this list:
            #
            longjumps_to_heapcleaner_calls__global      =   REF ([]:  List( Spec_For_Longjump_To_Heapcleaner_Call ));   #

            ######################################################################
            # Auxiliary functions
            ######################################################################

            # Divide a list of "registers" into two lists,
            # one containing the true registers
            # and one containing the ramreg extra-registers-faked-in-ram.
            #
            # Memory offsets must be relative
            # to the frame pointer.
            #
            # We need this mainly(?) because Intel32 is so register-starved
            # that we use memory words for some of our "registers":
            #
            fun split_registers_list_into_rregs_lists  registers                                                # "rregs" == "regs_plus_ramregs".
                =
                {   the_vfp =   pri::virtual_framepointer;

                    the_fp  =   case (pri::framepointer FALSE)
                                    #
                                    tcf::CODETEMP_INFO (_, the_fp) =>   the_fp;
                                    _                    =>   error "the_fp";
                                esac;

                    # At this point, the_vfp will always eventually
                    # end up being the_fp, but lowhalf_gen might
                    # pass in references to the_vfp anyway (because
                    # of some RCC that happens to be in the cccomponent)
                    # so we test for both the real frame pointer (the_fp)
                    # and the virtual frame pointer (the_vfp) here:
                    #
                    fun is_framepointer fp
                        =
                        rkj::codetemps_are_same_color (fp, the_fp)    or
                        rkj::codetemps_are_same_color (fp, the_vfp);

                    #
                    fun split_regs_from_ram ([], regs, mem)                                                                             # Done -- return two resultlists.
                            =>
                            (regs, mem);

                        split_regs_from_ram (tcf::CODETEMP_INFO(_, r) ! rest, regs, mem)                                                                # True register -- add to 'regs' resultlist.
                            =>
                            split_regs_from_ram (rest, r ! regs, mem);

                        split_regs_from_ram (tcf::LOAD(_, tcf::CODETEMP_INFO(_, fp), _) ! rest, regs, mem)                                      # Ram "register" -- add to 'mem" resultlist.
                            =>
                            if (is_framepointer fp)   split_regs_from_ram (rest, regs, 0 ! mem);
                            else                   error "split_registers_list_into_rregs_lists: LOAD32";
                            fi;

                        split_regs_from_ram (tcf::LOAD(_, tcf::ADD(_, tcf::CODETEMP_INFO(_, fp), tcf::LITERAL i), _) ! rest, regs, mem)         # Ram "register" -- add to 'mem" resultlist.
                            =>
                            if (is_framepointer fp)   split_regs_from_ram (rest, regs, tcf::mi::to_int (32, i) ! mem);                                                                  # 64-bit issue: '32' is bits-per-word.
                            else                   error "split_registers_list_into_rregs_lists: LOAD32";
                            fi;

                        split_regs_from_ram _
                            =>
                            error "split_regs_from_ram";
                    end;

                    (split_regs_from_ram (registers, [], []))
                        ->
                        (regs, mem);

                    { regs =>   rkj::sortuniq_colored_codetemps  regs,          # This sorts 'regs' by color (i.e., actual hardware register id) and drops any duplicated colors.
                      mem  =>   sl::uniq mem
                    };
                };
            #
            fun rregs_difference (   { regs=>r1, mem=>m1 },
                                     { regs=>r2, mem=>m2 }
                                 )
                =
                { regs =>   cos::difference_of_colorsets (r1, r2),
                  mem  =>    sl::difference              (m1, m2)
                };
            #
            fun rregs_to_string { regs, mem }
                #
                = "{ "
                +   fold_backward   (\\ (reg, s) =   rkj::register_to_string reg + " " + s)   ""   regs
                +   fold_backward   (\\ (mem, s) =   int::to_string          mem + " " + s)   ""   mem
                + "}";

            # The mutator (user Mythryl code) passes
            # root pointers to the heapcleaner via the
            # following set of registers and ram cells:
            #
            heapcleaner_arg_rregs                                                       # On Intel32 this is:   ([esi, ebp, ebx, ecx, edx],  [vreg 0, vreg 1]).
                =
                split_registers_list_into_rregs_lists  heapcleaner_arg_registers;

            # Later we'll need an arbitrary element of the
            # arg-registers list, so we create it here:
            #
            a_heapcleaner_arg_reg  =   tcf::CODETEMP_INFO (32, head heapcleaner_arg_rregs.regs);                                                                # 64-BIT ISSUE: '32' is bits-per-word.

            # This function emits a heaplimit check-and-branch.
            # It returns the codelabel to which the test jumps,
            # which needs to be placed on the heapcleaner-invocation
            # basic block or a longjump to it:
            #
            fun put_heaplimit_check_and_branch (emit, max_possible_heapbytes_allocated_before_next_heaplimit_check)
                =
                heaplimit_branch_target_label
                where
                    heaplimit_branch_target_label =   lbl::make_anonymous_codelabel ();
                    #
                    fun put__call_heapcleaner_if                                                # Emit code which tests for heap-exhausted and runs the heapcleaner if it is.
                            #
                            heap_is_exhausted__test                                             # Some way of testing whether  (heap_allocation_pointer > heap_allocation_limit)
                        =                                                                       #  -- see src/lib/compiler/back/low/main/nextcode/platform-register-info.api
                        emit  (tcf::NOTE
                                ( tcf::IF_GOTO (heap_is_exhausted__test, heaplimit_branch_target_label),
                                  unlikely
                                )
                              );

                    if (max_possible_heapbytes_allocated_before_next_heaplimit_check < skid_pad_size_in_bytes)
                        #
                        case pri::heap_is_exhausted__test
                            #
                            THE platform_specific__heap_is_exhausted__test =>  put__call_heapcleaner_if  platform_specific__heap_is_exhausted__test;            # Check result of heap-exhausted test preserved in status register.
                            NULL                                           =>  put__call_heapcleaner_if             normal__heap_is_exhausted__test;            # Do full heap-exhausted test.
                        esac;
                        #
                        # In the platform-specific case above
                        # we are not actually doing the
                        #
                        #     (heap_allocation_pointer > heap_allocation_limit)
                        #
                        # comparison at this point, but rather just checking
                        # preserved status-register bits produced by the
                        # delay-slot compare generated in
                        #
                        #     src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg
                        #
                        # The apparent point of this is so we can do the actual
                        # compares "for free" in delay slots on Sparc etc.
                    else
                        offset_heap_allocation_pointer
                            =
                            tcf::ADD  ( pri::address_width,
                                        pri::heap_allocation_pointer,
                                        make_int_literal (max_possible_heapbytes_allocated_before_next_heaplimit_check - skid_pad_size_in_bytes)
                                      );

                        shifted_heaplimit_test =   tcf::CMP (bits_per_pointer, heapcleaner_gt, offset_heap_allocation_pointer, pri::heap_allocation_limit vfp);

                        case pri::heap_is_exhausted__test
                            #
                            THE (platform_specific__heap_is_exhausted__test as tcf::CC(_, r))
                                =>
                                {   emit (tcf::LOAD_INT_REGISTER_FROM_FLAGS_REGISTER (r, shifted_heaplimit_test));
                                    #
                                    put__call_heapcleaner_if  platform_specific__heap_is_exhausted__test;
                                };

                            NULL =>  put__call_heapcleaner_if  shifted_heaplimit_test;

                            _ => error "put_heaplimit_check_and_branch";
                        esac;
                    fi;
                end;                                                            # fun put_heaplimit_check_and_branch

            # Recompute the base pointer address,
            # since heapcleaner may have moved code.
            # This code will be run immediately after
            # the heapcleaner returns to us.
            #
            #     "The base_pointer contains the start address                                                                                  # The base_pointer appears to be used only in:
            #      of the entire compilation unit."                                                                                             #
            #                                                                                                                                   #        src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg
            # Here we basically generate code equivalent to:
            #
            #      return_label:
            #          base_pointer = heapcleaner_link + (base_pointer_offset - return_label);
            #
            # On Intel32   base_pointer_offset   is zero, so this reduces to:
            #
            #      return_label:
            #          base_pointer = heapcleaner_link - return_label;
            #
            # If 'return_label' is our offset relative to start
            # of current package's compiled binary code, and
            # if 'heapcleaner_link' is essentially current pc,
            # then the difference will give the start of the
            # current package's compiled binary code.
            #
            base_pointer_offset =   tcf::LITERAL  (multiword_int::from_int   mp::const_base_pointer_reg_offset);
            #
            fun put_base_pointer_update  (emit,  put_private_label,  put_bblock_note)
                =
                {   return_label =   lbl::make_anonymous_codelabel ();

                    base_pointer_expression                                                                                                             # heapcleaner_link + (base_pointer_offset - return_label)
                        =
                        tcf::ADD  ( pri::address_width,
                                    pri::heapcleaner_link  vfp,
                                    tcf::LABEL_EXPRESSION
                                        (tcf::SUB
                                          ( pri::address_width,
                                            base_pointer_offset,
                                            tcf::LABEL return_label
                                          )
                                        )
                                  );

                    put_private_label  return_label;

                    put_bblock_note  zero_freq_note;

                    case (pri::base_pointer vfp)                                                                                                        # "The base_pointer contains the start address of the entire compilation unit."
                        #
                        tcf::CODETEMP_INFO  (bits, base_pointer_reg)       =>  emit (tcf::LOAD_INT_REGISTER (bits, base_pointer_reg,  base_pointer_expression));                # base_pointer_reg =  heapcleaner_link + (base_pointer_offset - return_label)
                        tcf::LOAD (bits, base_pointer_addr, mem) =>  emit (tcf::STORE_INT         (bits, base_pointer_addr, base_pointer_expression, mem));
                        _                                   =>  error "put_base_pointer_update";
                    esac;
                };

            ######################################################################
            # Main functions
            ######################################################################

            # This fun is called (only) from:
            #
            #     src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg
            #
            fun clear__public_fn_heapcleaner_call_specs__private_fn_heapcleaner_call_specs__and__longjumps_to_heapcleaner_calls  ()
                =
                {   public_fn_heaplimit_checks__global          :=  [];
                    private_fn_heaplimit_checks__global         :=  [];
                    longjumps_to_heapcleaner_calls__global      :=  [];
                };

            # Split the live register list into three lists by type:
            #
            #    o Root:   Pointer into heap in general-purpose register.
            #    o Int:    Nonpointer        in general-purpose register.
            #    o Float:  Nonpointer        in floating-point  register.
            #
            fun classify_live_registers_into_root_int_and_float ([], [], rootholding_registers, intholding_registers, floatholding_registers)
                    =>
                    { rootholding_registers, intholding_registers, floatholding_registers };

                classify_live_registers_into_root_int_and_float (  tcf::INT_EXPRESSION r ! rl, ncf::typ::FLOAT64 ! tl, b, i, f) =>   error "classify_live_registers_into_root_int_and_float: tcf::INT_EXPRESSION";
                classify_live_registers_into_root_int_and_float (  tcf::INT_EXPRESSION r ! rl, ncf::typ::INT1    ! tl, b, i, f) =>   classify_live_registers_into_root_int_and_float (rl, tl,     b, r ! i,     f);
                classify_live_registers_into_root_int_and_float (  tcf::INT_EXPRESSION r ! rl, _                 ! tl, b, i, f) =>   classify_live_registers_into_root_int_and_float (rl, tl, r ! b,     i,     f);
                classify_live_registers_into_root_int_and_float (tcf::FLOAT_EXPRESSION r ! rl, ncf::typ::FLOAT64 ! tl, b, i, f) =>   classify_live_registers_into_root_int_and_float (rl, tl,     b,     i, r ! f);

                classify_live_registers_into_root_int_and_float _ => error "classify_live_registers_into_root_int_and_float";
            end;

            stipulate
                fun put_heaplimit_check_and_push_heapcleaner_call_spec
                        {
                          heapcleaner_call_specs,                                       # Where to push the callspec -- this will be either   public_fn_heapcleaner_call_specs   or   private_fn_heapcleaner_call_specs.
speclist_name,
                          fn_is_private,
                          fn_will_be_optimized
                        }
                        ( { put_op, ... }: Stream)
                        { max_possible_heapbytes_allocated_before_next_heaplimit_check, live_registers, live_register_types, return }
                    =
                    {   # Partition the live registers into:
                        #
                        #    o Those that hold roots (pointers into the heap) -- we'll pass these to the heapcleaner.
                        #    o Those that hold integer values.
                        #    o Those that hold float   values.
                        #
                        (classify_live_registers_into_root_int_and_float (live_registers, live_register_types, [], [], []))
                            ->
                            { rootholding_registers, intholding_registers, floatholding_registers };

                        # Generate a heaplimit check
                        # and push spec for its call.
                        #
                        # We assume initially that the heaplimit check
                        # will branch directly to the heapcleaner call,
                        # and set 'label_on_heapcleaner_call' accordingly.
                        #
                        # In the case of public fns, we will later change
                        # this so the heaplimit check branches to a longjump
                        # which then jumps to the heapcleaner call proper:
                        #
                        heapcleaner_call_specs
                            :=
                            SPEC_FOR_HEAPCLEANER_CALL
                              {
                                fn_is_private,
                                fn_will_be_optimized,
                                label_on_heapcleaner_call =>   REF (put_heaplimit_check_and_branch  (put_op,  max_possible_heapbytes_allocated_before_next_heaplimit_check)),
                                rootholding_registers,
                                intholding_registers,
                                floatholding_registers,
                                live_registers,
                                return
                            }
                            !
                            *heapcleaner_call_specs;
                    };
            herein
                # These three functions are called (only) from:
                #
                #     src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg

                # Check-limit for "public" functions,
                # i.e. functions which may have unknown callers:
                #
                put_heaplimit_check_and_push_heapcleaner_call_spec_for_public_fn
                    =
                    put_heaplimit_check_and_push_heapcleaner_call_spec
                      {
                        heapcleaner_call_specs =>  public_fn_heaplimit_checks__global,
speclist_name => "public_fn_heaplimit_checks__global",
                        fn_is_private          =>  FALSE,
                        fn_will_be_optimized   =>  FALSE
                      };

                # Check-limit for "private" functions, i.e..
                # those for which we explicitly know all
                # possible callers, and consequently can optimize
                # the calling register protocol:
                #
                put_heaplimit_check_and_push_heapcleaner_call_spec_for_unoptimized_private_fn
                    =
                    put_heaplimit_check_and_push_heapcleaner_call_spec
                      {
                        heapcleaner_call_specs =>  private_fn_heaplimit_checks__global,
speclist_name => "private_fn_heaplimit_checks__global",
                        fn_is_private          =>  TRUE,
                        fn_will_be_optimized   =>  FALSE
                      };

                # Same as above, but for functions which are to be optimized:
                #
                put_heaplimit_check_and_push_heapcleaner_call_spec_for_optimized_private_fn
                    =
                    put_heaplimit_check_and_push_heapcleaner_call_spec
                      {
                        heapcleaner_call_specs =>  private_fn_heaplimit_checks__global,
speclist_name => "private_fn_heaplimit_checks__global",
                        fn_is_private          =>  TRUE,
                        fn_will_be_optimized   =>  TRUE
                      };
            end;

            # Allocate a rw_vector for checking for overlaps
            # between live-roots and heapcleaner-arg-registers.
            # This state is shared (only) by pack() and unpack():
            #
            stipulate
                max_arg_reg_id
                    =
                    fold_backward
                        (\\ (register, n) =  int::max (rkj::intrakind_register_id_of  register,  n))
                        0
                        heapcleaner_arg_rregs.regs;
            herein
                # This is the usual hack where instead of taking time
                # to clear our vector to zero each run, we just increment
                # stamp__global each run, giving us a new "set" value to
                # check for in the vector -- this effectively clears the
                # the vector to "zero" in O(1) time:
                #
                live_regs_vector__global =  rwv::make_rw_vector (max_arg_reg_id + 1, -1);                       # More icky thread-hostile mutable global state.  XXX SUCKO FIXME
                stamp__global            =  REF 0;                                                              # More icky thread-hostile mutable global state.  XXX SUCKO FIXME
            end;

            fun put_code_to_load_all_roots_into_heapcleaner_arg_registers
                  ( put_op,
                    #
                    available_heapcleaner_arg_registers,
                    #
                    rootholding_registers,
                    intholding_registers,
                    floatholding_registers
                  )
                =
                # This function emits code to pack
                #
                #     intholding_registers
                #     floatholding_registers
                #     rootholding_registers
                #
                # into
                #
                #     heapcleaner_arg_registers
                #
                # The contents of the first two are nonpointer data of
                # no interest to the heapcleaner, so they get saved on
                # the heap in a new records (which becomes a new member
                # of rootholding_registers).
                #
                # If the registers-to-pass (rootholding_registers) outnumber
                # the registers-available (available_heapcleaner_arg_registers)
                # then we pack the overflow into a heap record, which
                # likewise becomes a new root to be passed to the heapcleaner.
                #
                # heapcleaner_arg_registers must be non-empty -- we can't pass
                # anything to the heapcleaner in zero registers!
                #
                # We return a function to unpack everything back into
                # the original registers after heapcleaning is complete.
                {   Heapcleaner_Flavor                                          # Classify register contents per heapcleaner's view of the world.
                      #
                      = REG      rkj::Codetemp_Info                             # Integer register.
                      | FREG     rkj::Codetemp_Info                             # Floating point register.
                      | MEM      (tcf::Int_Expression, frr::Ramregion)          # Integer memory register.
                      #
                      | RECORD  { is_boxed:     Bool,                           # Is it a boxed record?
                                  words:        Int,                            # How many words?
                                  reg:          rkj::Codetemp_Info,             # Address of this record.
                                  reg_tmp:      rkj::Codetemp_Info,             # Temp used for unpacking.
                                  fields:       List( Heapcleaner_Flavor )      # Its fields.
                                };

                    # Translate int_expression/float_expression into heapcleaner flavor.
                    # Note: client roots from memory (XXX) should NOT be used without
                    # fixing a potential cycle problem in the parallel copies below.            XXX SUCKO FIXME
                    # Currently no architectures -- including Intel32 -- use
                    # the LOAD(...) form, so we are safe.
                    #
                    fun tcf_reg_to_heapcleaner_flavor (tcf::CODETEMP_INFO  (32, r            )) =>   REG r;
                        tcf_reg_to_heapcleaner_flavor (tcf::LOAD (32, ea, ramregion)) =>   MEM (ea, ramregion);  #  XXX
                        tcf_reg_to_heapcleaner_flavor (_)                             =>   error "tcf_reg_to_heapcleaner_flavor";
                    end;
                    #
                    fun tcf_freg_to_heapcleaner_flavor (tcf::CODETEMP_INFO_FLOAT (64, r)) =>   FREG r;
                        tcf_freg_to_heapcleaner_flavor (_)                 =>   error "tcf_freg_to_heapcleaner_flavor";
                    end;

                    stamp  =  *stamp__global;
                    cyclic =   stamp + 1;

                    # "Clear" our live_regs_vector__global by
                    # incrementing stamp__global.  We increment by
                    # two because we need two fresh values on each
                    # pass -- 'stamp' and 'cyclic' above:
                    #
                    if (stamp > 100000)   stamp__global := 0;                   # Wrap around after compiling 10,000 cccomponents (packages).
                    else                  stamp__global := stamp + 2;
                    fi;

                    live_regs_vector_length =   rwv::length  live_regs_vector__global;

                    {   note_live_registers  rootholding_registers;
                        note_live_registers   intholding_registers;
                        #
                        note_arg_registers  available_heapcleaner_arg_registers;
                    }
                    where
                        # Here we're entering all our live registers into
                        #
                        #     live_regs_vector__global
                        #
                        fun note_live_registers []
                                =>
                                ();

                            note_live_registers   (tcf::CODETEMP_INFO(_, register)   ! rest)
                                =>
                                {   reg_id =  rkj::intrakind_register_id_of  register;

                                    if (reg_id < live_regs_vector_length)
                                        #
                                        rwv::set (live_regs_vector__global, reg_id, stamp);
                                    fi;

                                    note_live_registers  rest;
                                };

                            note_live_registers(_ ! rs)
                                =>
                                note_live_registers rs;
                        end;

                        # Here we're checking all our heapcleaner arg registers against
                        #
                        #     live_regs_vector__global
                        #
                        # to see if they are also live registers -- if so, we'll
                        # have to be careful when copying live-reg values into
                        # the heapcleaner-arg registers.
                        #
                        # We mark any overlapping registers as 'cyclic':
                        #
                        fun note_arg_registers []
                                =>
                                ();

                            note_arg_registers (tcf::CODETEMP_INFO(_, register) ! rest)
                                =>
                                {   reg_id = rkj::intrakind_register_id_of  register;

                                    if  (rwv::get (live_regs_vector__global, reg_id) == stamp)
                                         rwv::set (live_regs_vector__global, reg_id, cyclic);
                                    fi;

                                    note_arg_registers rest;
                                };

                            note_arg_registers (_ ! rest)
                                =>
                                note_arg_registers rest;
                        end;
                    end;

                    # Any int or float values in live registers
                    # are of no interest (per se) to the heapcleaner
                    # but they do need to be preserved in ram
                    # during heapcleaning and then restored to their
                    # original registers before we restart the interrupted
                    # user program, so here we (set up to) save the contents of
                    # all intholding_registers and floatholding_registers in
                    # a new heap record and add that record to our list of
                    # roots to be passed to the heapcleaner:
                    #
                    roots_for_heapcleaner
                        =
                        case (intholding_registers, floatholding_registers)
                            #
                            ([], [])                                                            # No int or float values to preserve -- life is easy!
                                =>
                                map  tcf_reg_to_heapcleaner_flavor  rootholding_registers;

                            _
                                =>
                                {   # Align the heap_allocation_pointer
                                    # if we have floating point roots:
                                    #
                                    case floatholding_registers
                                        #
                                        [] => ();                                               # No 64-bit values to preserve, so no need to 64-bit align the heappointer.

                                        _  => put_op  ( tcf::LOAD_INT_REGISTER                  # heap_allocation_pointer |= 4;
                                                           ( pri::address_width,
                                                             heap_allocation_pointer_register,
                                                             #
                                                             tcf::BITWISE_OR
                                                               ( pri::address_width,
                                                                 pri::heap_allocation_pointer,
                                                                 make_int_literal 4             # 64-bit issue. This aligns heap_allocation_pointer correctly for a 32-bit tagword followed by 64-bit float.
                                                               )                                # This won't work and is counterproductive if we're using 64-bit tagwords and keeping the heap always 64-bit aligned.  XXX BUGGO FIXME.
                                                           )
                                                       );
                                    esac;

                                    # Figure out how many 64-bit words it will take to hold
                                    # all the int and float values we want to preserve:
                                    #
                                    qwords =   length floatholding_registers + (length intholding_registers + 1) / 2;   # 64-bit issue. We'll be using two_word_int not one_word_int in 64-bit mode.

                                    # Add the record spec to our list of heapcleaner roots.
                                    # (We do not yet actually create it on the heap.)
                                    # Note that the float stuff (more generally, the 64-bit
                                    # stuff) has to come first, while we still have 64-bit
                                    # alignment guaranteed:
                                    #
                                    RECORD
                                      {
                                        is_boxed =>  FALSE,
                                        reg      =>  rgk::make_int_codetemp_info (),
                                        reg_tmp  =>  rgk::make_int_codetemp_info (),

                                        words    =>  qwords + qwords,                           # 'words' is measured in 32-bit words, so double the number of 64-bit words to get the right value.

                                        fields   =>  map  tcf_freg_to_heapcleaner_flavor  floatholding_registers
                                                   @ map   tcf_reg_to_heapcleaner_flavor    intholding_registers
                                      }
                                      !
                                      map  tcf_reg_to_heapcleaner_flavor  rootholding_registers;

                                };
                        esac;

                    # Now we check whether we have enough
                    # heapcleaner argument registers to hold
                    # all the roots we need to pass to it.
                    # If so, we are golden; otherwise, we must
                    # spill some roots into a heap record and
                    # then pass that record as a new root:
                    #
                    heapcleaner_root_count  =   length  roots_for_heapcleaner;
                    arg_register_count      =   length  available_heapcleaner_arg_registers;
                    #
                    roots_for_heapcleaner
                        =
                        if (heapcleaner_root_count  <=  arg_register_count)
                            #
                            roots_for_heapcleaner;                                                                      # Good enough.
                        else
                            # Spill excess roots into a record:
                            #
                            spill_count    =  (heapcleaner_root_count - arg_register_count) + 1;                        # "+1" because we must also pass the record we're constructing here.
                            #
                            roots_to_spill  =  list::take_n (roots_for_heapcleaner, spill_count);                       # First 'spill_count' elements of roots_for_heapcleaner list.
                            remaining_roots =  list::drop_n (roots_for_heapcleaner, spill_count);                       # Remaining           elements of roots_for_heapcleaner list.
                            #
                            RECORD
                              {
                                is_boxed =>   TRUE,
                                #
                                words    =>   length roots_to_spill,
                                fields   =>          roots_to_spill,
                                #
                                reg_tmp  =>   rgk::make_int_codetemp_info (),
                                reg      =>   rgk::make_int_codetemp_info ()
                                #
                              }
                              !
                              remaining_roots;
                        fi;
                    #
                    fun put_parallel_copy ( [],   _) =>   ();
                        put_parallel_copy (dst, src) =>   put_op (tcf::MOVE_INT_REGISTERS (32, dst, src));              # Parallel copy of N source registers to N destination registers, possibly overlapping.
                    end;

                    # Here we emit the heapcleaner-call prolog -- the code
                    # immediately preceding the actual heapcleaner-call, which
                    # passes root pointers to heapcleaner in appropriate registers.
                    # We have to make sure that cycles are correctly handled
                    # so we can't do a copy at a time!  But see XXX below.
                    #
                    fun put_prolog (heapbytes_allocated, unused_registers, [], to_regs, from_regs)
                            =>
                            # No more roots to pass to heapcleaner (arg 3)
                            # so now we wrap up and return:
                            #
                            {   # Update the heap_allocation_pointer if we have done any allocation:
                                #
                                if (heapbytes_allocated > 0)
                                    #
                                    put_op ( tcf::LOAD_INT_REGISTER                                     # heap_allocation_pointer += heapbytes_allocated;
                                                ( pri::address_width,
                                                  heap_allocation_pointer_register,
                                                  tcf::ADD
                                                    ( pri::address_width,
                                                      pri::heap_allocation_pointer,
                                                      make_int_literal heapbytes_allocated
                                                    )
                                                )
                                            );
                                fi;

                                # Emit the instructions that actually copy
                                # the heap roots into the heapcleaner arg registers:
                                #
                                put_parallel_copy (to_regs, from_regs);

                                # Any unused heapcleaner arg registers
                                # must be cleared to void -- otherwise
                                # the heapcleaner will try to interpret
                                # them as valid heap-root pointers:
                                #
                                set_registers_to_void  unused_registers
                                where
                                    fun set_registers_to_void [] => ();
                                        #
                                        set_registers_to_void (tcf::CODETEMP_INFO  (type, rd     ) ! roots) =>   { put_op (tcf::LOAD_INT_REGISTER (type, rd, void));    set_registers_to_void roots; };
                                        set_registers_to_void (tcf::LOAD (type, ea, mem) ! roots) =>   { put_op (tcf::STORE_INT (type, ea, void, mem)); set_registers_to_void roots; };
                                        #
                                        set_registers_to_void _ => error "set_registers_to_void";
                                    end;
                                end;
                            };

                        #######################################################################################################################################################
                        #                                   Available arg registers             Roots to pass                                         Parallel-copy resultlists
                        #                                   -----------------------------       ---------------------                                 -------------------------
                        put_prolog (heapbytes_allocated,   tcf::CODETEMP_INFO(_, to_reg) ! argregs,      REG from_reg ! roots,                                 to_regs, from_regs)
                            =>
                            # Copy root in from_reg into heapcleaner
                            # parameter register to_reg:
                            #
                            put_prolog (heapbytes_allocated,   argregs, roots,   to_reg ! to_regs,   from_reg ! from_regs);

                        #######################################################################################################################################################
                        #                                   Available arg registers             Roots to pass                                         Parallel-copy resultlists
                        #                                   -----------------------------       ------------------------------------------            -------------------------
                        put_prolog (heapbytes_allocated,   tcf::CODETEMP_INFO(_, to_reg) ! argregs,      RECORD (rec as { reg => from_reg, ... } ) ! roots,    to_regs, from_regs)
                            =>
                            {   # Make a record on heap per spec,
                                # then copy the pointer to it into
                                # a heapcleaner arg register:
                                #
                                heapbytes_allocated =   put__allocate_record (heapbytes_allocated, rec);
                                #
                                put_prolog  (heapbytes_allocated,  argregs,  roots,   to_reg ! to_regs,   from_reg ! from_regs);
                            };

                        #######################################################################################################################################################
                        #                                   Available arg registers             Roots to pass                                         Parallel-copy resultlists
                        #                                   -----------------------------       ------------------------------------------            -------------------------
             #          put_prolog (heapbytes_allocated,   tcf::LOAD(_, ea, mem) ! argregs,    root ! roots,                                         to_regs, from_regs)    # XXX
             #              =
             #              # The following code is unsafe because of potential cycles!
             #              # But luckly, it is unused XXX.
             #              #
             #              {   my (heapbytes_allocated, e)
             #                      =
             #                      case root
             #                          #
             #                          REG r         => (heapbytes_allocated, tcf::CODETEMP_INFO (32, r));
             #                          MEM (ea, mem) => (heapbytes_allocated, tcf::LOAD (32, ea, mem));
             #                          #
             #                          RECORD (r as { reg, ... } )
             #                              =>
             #                              (put__allocate_record (heapbytes_allocated, r), tcf::CODETEMP_INFO (32, reg));
             #
             #                          _ => error "floating point root";
             #                      esac;
             #
             #                 put_op (tcf::STORE_INT (32, ea, e, mem));
             #
             #                 put_prolog (heapbytes_allocated, argregs, roots, to_regs, from_regs);
             #             }

                        put_prolog _ => error "put_prolog";
                    end

                    # Emit code to construct a record on the heap
                    # and to leave a pointer to the record in 'reg'.
                    #
                    # This record will eventually get unpacked again
                    # by code emitted by   put__unpack_record:
                    #
                    also
                    fun put__allocate_record (heapbytes_allocated, { is_boxed, words, reg, fields, ... } )
                        =
                        {   fun heaptop_plus n                                                                                          # heap_allocation_pointer + n
                                =
                                tcf::ADD  (pri::address_width,  pri::heap_allocation_pointer,  make_int_literal n);

                            #
                            fun store_int (heapbytes_allocated, e)                                                                      # heap_allocation_pointer[heapbytes_allocated] = e
                                =
                                put_op (tcf::STORE_INT (32, heaptop_plus heapbytes_allocated, e, frr::memory));                 # 64-bit issue: '32' is wordsize-in-bits.

                            #
                            fun store_float (heapbytes_allocated, e)                                                                    # heap_allocation_pointer[heapbytes_allocated] = e
                                =
                                put_op (tcf::STORE_FLOAT (64, heaptop_plus heapbytes_allocated, e, frr::memory));

                            # Store given list of registers and records at successive locations
                            # starting at 'heapbytes_allocated' (=="heap pointer"):
                            #
                            fun store_fields  (heapbytes_allocated, [])
                                    =>
                                    ();                                                                                                 # Done.

                                store_fields  (heapbytes_allocated,  field ! rest)
                                    =>
                                    case field
                                        #
                                        REG r
                                            =>
                                            {   store_int (heapbytes_allocated, tcf::CODETEMP_INFO (32, r));                                    # heap_allocation_pointer[heapbytes_allocated] = r
                                                store_fields (heapbytes_allocated+4, rest);                                             # 64-bit issue: '32' is wordsize-in-bytes.
                                            };

                                        RECORD { reg, ... }
                                            =>
                                            {   store_int (heapbytes_allocated, tcf::CODETEMP_INFO (32, reg));                                  # heap_allocation_pointer[heapbytes_allocated] = reg
                                                store_fields (heapbytes_allocated+4, rest);                                             # 64-bit issue: '32' is wordsize-in-bytes.
                                            };

                                        MEM (ea, m)
                                            =>
                                            {   store_int (heapbytes_allocated, tcf::LOAD (32, ea, m));                                 # heap_allocation_pointer[heapbytes_allocated] = *m
                                                store_fields (heapbytes_allocated+4, rest);                                             # 64-bit issue: '32' is wordsize-in-bytes.
                                            };

                                        FREG r
                                            =>
                                            {   store_float (heapbytes_allocated, tcf::CODETEMP_INFO_FLOAT (64, r));                                    # heap_allocation_pointer[heapbytes_allocated] = r
                                                store_fields (heapbytes_allocated+8, rest);
                                            };
                                     esac;
                            end;

                            # Allocate subrecords of our record --
                            # we need their addresses now.
                            #
                            # (These subrecords eventually get unpacked
                            # by code emitted by  put__unpack_subrecords.)
                            #
                            heapbytes_allocated
                                =
                                put_code_to_allocate_subrecords (fields, heapbytes_allocated)
                                where
                                    fun put_code_to_allocate_subrecords ([], heapbytes_allocated)
                                            =>
                                            heapbytes_allocated;

                                        put_code_to_allocate_subrecords (RECORD r ! args, heapbytes_allocated)
                                            =>
                                            put_code_to_allocate_subrecords (args, put__allocate_record (heapbytes_allocated, r));

                                        put_code_to_allocate_subrecords (_ ! args, heapbytes_allocated)
                                            =>
                                            put_code_to_allocate_subrecords (args, heapbytes_allocated);
                                    end;
                                end;

                            tagword =   is_boxed  ??    make_boxed_tagword  words
                                                  ::  make_unboxed_tagword  words;

                            # Emit code to allot and set
                            # the tagword for our record/rawrec:
                            #
                            put_op (tcf::STORE_INT (32,  heaptop_plus heapbytes_allocated,  make_int_literal tagword,  frr::memory));           # 64-bit issue: '32' is bits-per-word.

                            # Emit code to allot and set
                            # the fields for our record/rawrec:
                            #
                            store_fields (heapbytes_allocated+4, fields);                                                                       # 64-bit issue:  '4' is bytes-per-word.

                            # Emit code to save the address of
                            # our record/rawrec in specified register:
                            #
                            put_op (tcf::LOAD_INT_REGISTER (pri::address_width, reg, heaptop_plus (heapbytes_allocated+4)));                    # 64-bit issue:  '4' is bytes-per-word.

                            # Return new top-of-heap:
                            #
                            heapbytes_allocated + 4 + unt::to_int_x (unt::(<<) (unt::from_int words, 0u2));                                     # 64-bit issue:  '4' is bytes-per-(tag)word.
                        };                                                                                                                      # fun put__allocate_record

                    # Here we emit the heapcleaner-call epilog -- the code
                    # immediately following the actual heapcleaner-call, which
                    # restores all mutator registers to their original values.
                    #
                    # Again, to avoid potential cycles we
                    # generate a single parallel copy:
                    #
                    fun put_epilog ([], unused_heapcleaner_arg_registers, to_regs, from_regs)
                            =>
                            put_parallel_copy (to_regs, from_regs);

                        ###################################################################################################################
                        #           Roots passed to heapcleaner                 Heapcleaner arg registers        Parallel-copy resultlists
                        #           ---------------------------                 --------------------------       -------------------------
                        put_epilog (REG to_reg ! roots,                        tcf::CODETEMP_INFO(_, rs) ! argregs,       to_regs, from_regs)
                            =>
                            put_epilog (roots, argregs, to_reg ! to_regs, rs ! from_regs);

                        ###################################################################################################################
                        #           Roots passed to heapcleaner                 Heapcleaner arg registers        Parallel-copy resultlists
                        #           ---------------------------                 --------------------------       -------------------------
                        put_epilog (RECORD { fields, reg_tmp, ... } ! roots,   tcf::CODETEMP_INFO(_, r) ! argregs,        to_regs, from_regs)
                            =>
                            {   # Unpack a record created by put_prolog:

                                # Load address of record into a register:
                                #
                                put_op (tcf::MOVE_INT_REGISTERS (32, [reg_tmp], [r]));

                                (put__unpack_record (reg_tmp, fields, to_regs, from_regs))
                                    ->
                                    (to_regs, from_regs);

                                put_epilog (roots, argregs, to_regs, from_regs);
                            };

                        ###################################################################################################################
                        #           Roots passed to heapcleaner                 Heapcleaner arg registers        Parallel-copy resultlists
                        #           ---------------------------                 -------------------------        -------------------------
                        put_epilog (root ! roots,                              argreg ! argregs,                to_regs, from_regs)
                            =>
                            {   put_assign (root, argreg); #  XXX
                                put_epilog (roots, argregs, to_regs, from_regs);
                            };

                        put_epilog _ => error "put_epilog";
                    end

                    also
                    fun put_assign (REG r, e)         =>   put_op (tcf::LOAD_INT_REGISTER (32, r, e));                          # Set a real (hardware) register.
                        put_assign (MEM (ea, mem), e) =>   put_op (tcf::STORE_INT (32, ea, e, mem));                                    # Set a ramreg -- a "register" implemented as a stackframe slot.
                        put_assign _                  =>   error "put_assign";
                    end

                    # Emit code to unpack the register contents saved
                    # in a record created by put__allocate_record,
                    # loading them back into their original registers:
                    #
                    also
                    fun put__unpack_record (record_r, fields, to_regs, from_regs)
                        =
                        {    (put__unpack_fields (0, fields, to_regs, from_regs))
                                ->
                                (to_regs, from_regs);

                            put__unpack_subrecords (0, fields, to_regs, from_regs);
                        }
                        where
                            stipulate
                                record_address                    =   tcf::CODETEMP_INFO (32, record_r);                                                                                # 64-bit issue: '32' is 'bits-per-word'.
                                fun record_field_at record_offset =   tcf::ADD  (pri::address_width,  record_address,  make_int_literal record_offset);
                            herein
                                fun int_field_at    record_offset =   tcf::LOAD  (32, record_field_at record_offset, frr::memory);                                      # 64-bit issue: '32' is 'bits-per-word'.
                                fun float_field_at  record_offset =   tcf::FLOAD (64, record_field_at record_offset, frr::memory);
                            end;

                            live_regs_vector_length =   rwv::length  live_regs_vector__global;

                            # Emit code to unpack normal fields.
                            # We use our to_regs/from_regs to
                            # accumulate a parallel move of int-regs,
                            # which is used only in the 'cyclic' case
                            # where a register is both a mutator root
                            # and also a heapcleaner arg register:
                            #
                            fun put__unpack_fields (_, [], to_regs, from_regs)
                                    =>
                                    (to_regs, from_regs);                                                                               # Done.

                                put__unpack_fields (offset_in_record,  FREG r ! fields,                   to_regs, from_regs)
                                    =>
                                    {   put_op (tcf::LOAD_FLOAT_REGISTER (64, r,  float_field_at  offset_in_record));
                                        #
                                        put__unpack_fields (offset_in_record+8, fields, to_regs, from_regs);
                                    };

                                put__unpack_fields (offset_in_record,  MEM (ea, mem) ! fields,            to_regs, from_regs)
                                    =>
                                    {   put_op (tcf::STORE_INT (32, ea, int_field_at offset_in_record, mem));  #  XXX                                                   # 64-bit issue: '32' is bits-per-word.
                                        #
                                        put__unpack_fields (offset_in_record+4, fields, to_regs, from_regs);                                                            # 64-bit issue: '4' is bytes-per-word.
                                    };

                                put__unpack_fields (offset_in_record,  RECORD { reg_tmp, ... } ! fields,  to_regs, from_regs)
                                    =>
                                    {   put_op (tcf::LOAD_INT_REGISTER (32, reg_tmp, int_field_at  offset_in_record));                                                  # 64-bit issue: '32' is bits-per-word.
                                        #
                                        put__unpack_fields (offset_in_record+4, fields, to_regs, from_regs);                                                            # 64-bit issue: '4' is bytes-per-word.
                                    };

                                put__unpack_fields (offset_in_record, REG to_reg ! fields,                    to_regs, from_regs)
                                    =>
                                    {   rd_id =   rkj::intrakind_register_id_of  to_reg;

                                        if (rd_id < live_regs_vector_length  and  rwv::get (live_regs_vector__global, rd_id) == cyclic)
                                            #
                                            # This register both contains live mutator data
                                            # and also is a heapcleaner arg registers, so
                                            # we need to indirect through a temp to avoid
                                            # clobbering stuff:
                                            #
                                            tmp_r =   rgk::make_int_codetemp_info ();

                                            # print "WARNING: CYCLE\n";

                                            put_op (tcf::LOAD_INT_REGISTER (32, tmp_r, int_field_at offset_in_record));                                         # 64-bit issue: '32' is bits-per-word.

                                            put__unpack_fields (offset_in_record+4, fields,   to_reg ! to_regs, tmp_r ! from_regs);                                     # 64-bit issue:  '4' is bytes-per-word.
                                        else
                                            put_op (tcf::LOAD_INT_REGISTER (32, to_reg,  int_field_at  offset_in_record));                                              # 64-bit issue: '32' is bits-per-word.

                                            put__unpack_fields (offset_in_record+4, fields, to_regs, from_regs);                                                        # 64-bit issue:  '4' is 'bytes-per-word.
                                        fi;
                                    };
                                end;

                            # Scan fieldlist looking for subrecords (==RECORD entries)
                            # and copy their contents back to where they belong, as part
                            # of restoring the pre-heapcleaning mutator (i.e., client program)
                            # register state.
                            #
                            # (These records were created by code emitted by
                            # put_code_to_allocate_subrecords.)
                            #
                            fun put__unpack_subrecords (_, [],                                                          to_regs, from_regs)
                                    =>
                                    (to_regs, from_regs);               # Done.

                                put__unpack_subrecords (record_offset, RECORD { fields, reg_tmp, ... } ! rest,          to_regs, from_regs)                             # The (only) case of interest.
                                    =>
                                    {   (put__unpack_record (reg_tmp, fields,                                           to_regs, from_regs))
                                            ->
                                            (to_regs, from_regs);

                                        put__unpack_subrecords (record_offset+4, rest,                                  to_regs, from_regs);                            # 64-bit issue: '4' is bytes-per-word.
                                    };

                                put__unpack_subrecords     (record_offset,   FREG _ ! rest,                             to_regs, from_regs)
                                 => put__unpack_subrecords (record_offset+8,          rest,                             to_regs, from_regs);

                                put__unpack_subrecords     (record_offset,   _      ! rest,                             to_regs, from_regs)
                                 => put__unpack_subrecords (record_offset+4,          rest,                             to_regs, from_regs);                            # 64-bit issue: '4' is bytes-per-word.
                            end;
                        end;                                                                                                                                            # fun put__unpack_record

                    # Emit code to load heapcleaner args into
                    # designated heapcleaner-parameter registers:
                    #
                    put_prolog (0, available_heapcleaner_arg_registers, roots_for_heapcleaner, [], []);

                    # Return a thunk which when evaluated will
                    # emit code to put all the mutator register
                    # contents back where we found them:
                    #
                    \\ () =  put_epilog (roots_for_heapcleaner, available_heapcleaner_arg_registers, [], []);
                };

            # The following auxiliary function generates
            # the actual call-heapcleaner code.
            #
            # It packages up the roots into the appropriate
            # records, calls the heapcleaner routine, then
            # unpacks the roots from the record.
            #
            fun put_heapcleaner_call''
                  {
                    stream => { put_op, put_bblock_note, put_private_label, ... }: Stream,
                    fn_is_private,
                    rootholding_registers,
                    intholding_registers,
                    floatholding_registers,
                    return
                  }
                =
                {   fun convert_rregs_to_treecode { regs, mem }
                        =
                        map (\\ r = tcf::CODETEMP_INFO (32, r))                                                                                         # 64-bit issue: '32' is bits-per-word.
                            regs
                        @
                        map (\\ i = tcf::LOAD (32, tcf::ADD (pri::address_width, pri::framepointer vfp, make_int_literal i), frr::memory))              # 64-bit issue: '32' is bits-per-word.
                            mem;

                    # IMPORTANT NOTE:
                    # If a root happens be in a heapcleaner parameter register,
                    # we can remove this root since it will be correctly
                    # targetted.
                    #
                    # rootholding_registers' are the boxed roots that
                    # we have to move to the appropriate registers.
                    #
                    # heapcleaner_arg_rregs are the registers that
                    # are available for communicating to the heapcleaner.
                    #
                    rootholding_rregs
                        =
                        split_registers_list_into_rregs_lists  rootholding_registers;

                    homeless_rootholding_rregs                                                                                  # We need to find a way to pass these to the heapcleaner.
                        =
                        rregs_difference (rootholding_rregs, heapcleaner_arg_rregs);                                            #

                    available_heapcleaner_arg_registers                                                                         # These are available to pass homeless roots to the heapcleaner.
                        =
                        rregs_difference (heapcleaner_arg_rregs, rootholding_rregs);                                            #

                    #
                    fun maybe_add_debug_comment_wrapper  treecode_which_calls_heapcleaner_via_framepointer
                        =
                        if (not *debug_heapcleaner)
                            #
                            treecode_which_calls_heapcleaner_via_framepointer;
                        else
                            tcf::NOTE
                              ( treecode_which_calls_heapcleaner_via_framepointer,
                                #
                                lhn::comment.x_to_note
                                    ( "roots="  + rregs_to_string  available_heapcleaner_arg_registers
                                    + " boxed=" + rregs_to_string  homeless_rootholding_rregs
                                    )
                              );
                        fi;

                    #  Convert them back to Treecode
                    #
                    homeless_rootholding_rregs      =   convert_rregs_to_treecode   homeless_rootholding_rregs;
                    available_heapcleaner_arg_rregs =   convert_rregs_to_treecode   available_heapcleaner_arg_registers;

                    # If we have any remaining client roots
                    # after the above trick, we have to
                    # make sure that available_heapcleaner_arg_rregs is not empty
                    # -- we need at least one heapcleaner root register
                    # in which to pass the remaining client roots to
                    # the heapcleaner:
                    #
                    my  ( available_heapcleaner_arg_rregs,
                          homeless_rootholding_rregs
                        )
                        =
                        case (available_heapcleaner_arg_rregs, intholding_registers, floatholding_registers, homeless_rootholding_rregs)
                            #
                            ([], [], [], [])
                                =>
                                ([], []);               #  It is okay.

                            ([], _, _, _)
                                =>
                                ([a_heapcleaner_arg_reg], homeless_rootholding_rregs @ [a_heapcleaner_arg_reg]);
                                #
                                # We put   a_heapcleaner_arg_reg   last to
                                # reduce register pressure  during unpacking.

                            _  => (available_heapcleaner_arg_rregs, homeless_rootholding_rregs);
                        esac;

                    put_code_to_restore_all_registers
                        =
                        put_code_to_load_all_roots_into_heapcleaner_arg_registers
                          (
                            put_op,
                            available_heapcleaner_arg_rregs,
                            homeless_rootholding_rregs,
                            intholding_registers,
                            floatholding_registers
                          );

                    put_bblock_note  heapcleaner_call_note;
                    put_bblock_note  no_optimization_note;
                    put_bblock_note  zero_freq_note;

                    put_op  (maybe_add_debug_comment_wrapper  treecode_which_calls_heapcleaner_via_framepointer);

                    if fn_is_private
                        #
                        put_base_pointer_update (put_op, put_private_label, put_bblock_note);
                    fi;

                    put_bblock_note  no_optimization_note;

                    put_code_to_restore_all_registers ();

                    put_op  return;
                };                                                                                                              # fun put_heapcleaner_call''

            # The following function is responsible
            # for generating only the call_heapcleaner code.
            #
#           fun put_heapcleaner_call  stream  { live_registers, live_register_types, return }                                   # Commented out 2011-08-05 CrT because it is never called.
#               =
#               {   (classify_live_registers_into_root_int_and_float (live_registers, live_register_types, [], [], []))
#                       ->
#                       { rootholding_registers, intholding_registers, floatholding_registers };
#
#                   put_heapcleaner_call'' { stream, fn_is_private=>TRUE, rootholding_registers, intholding_registers, floatholding_registers, return };
#               };
#

            # This function emits a comment
            # that stringifies the root set.
            # This is used for debugging only.
            #
            fun root_set_to_string { rootholding_registers, intholding_registers, floatholding_registers }
                =
                {   listify  "boxed="  rkj::register_to_string  (map  extract_reg   rootholding_registers )   +
                    listify  "one_word_int="  rkj::register_to_string  (map  extract_reg   intholding_registers  )   +
                    listify  "float="  rkj::register_to_string  (map  extract_freg  floatholding_registers);
                }
                where
                    fun extract_reg (tcf::CODETEMP_INFO (32, r)) =>  r;                                                 # Peel an int register.
                        extract_reg _                  =>  error "extract_reg";
                    end;
                    #
                    fun extract_freg (tcf::CODETEMP_INFO_FLOAT (64, f)) => f;                                                   # Peel a float register.
                        extract_freg _                   => error "extract_freg";
                    end;
                    #
                    fun listify title f []
                            =>
                            "";

                        listify title f l
                            =>
                            title + fold_backward
                                        \\ (x, "") => f x;
                                           (x,  y) => f x  +  ", "  +  y;
                                        end
                                        ""
                                        (cos::make_colorset  l) + " ";
                    end;
                end;

            # The following function is responsible for generating actual
            # heapcleaner-calling code, with entry labels and return information.
            #
            fun put_heapcleaner_call'
                  { stream  as { put_op, put_private_label, put_public_label, put_fn_liveout_info, put_bblock_note, ... },
                    fn_is_public
                  }
                  heapcleaner_call
                =
                {   heapcleaner_call
                        ->
                        SPEC_FOR_HEAPCLEANER_CALL { fn_is_private, fn_will_be_optimized, rootholding_registers, intholding_registers, floatholding_registers, live_registers, return, label_on_heapcleaner_call };

                    liveout =   fn_will_be_optimized  ??  []
                                                      ::  live_registers;

                    if fn_is_public     put_public_label    *label_on_heapcleaner_call;
                    else                put_private_label   *label_on_heapcleaner_call;
                    fi;

                    if (not fn_will_be_optimized)
                        #
                        put_heapcleaner_call'' { stream, fn_is_private, rootholding_registers, intholding_registers, floatholding_registers, return };
                    else
                        # When a private fn is to be optimized,
                        # no actual code is generated until later:                                              # If there any code in place to actually do this, I can't find it. -- 2011-08-10 CrT.
                        #
                        put_bblock_note (
                            #
                            lhn::heapcleaner_safepoint.x_to_note
                                #
                                (*debug_heapcleaner  ??  root_set_to_string { rootholding_registers, intholding_registers, floatholding_registers }
                                                     ::  ""
                                )
                        );

                        put_op  return;
                    fi;


                    case pri::heap_is_exhausted__test
                        #
                        THE platform_specific__heap_is_exhausted__test =>   put_fn_liveout_info ( tcf::FLAG_EXPRESSION platform_specific__heap_is_exhausted__test ! liveout);
                        NULL                                           =>   put_fn_liveout_info (                                                                   liveout);
                    esac;
                };

            # The following function checks
            # whether heapcleaner call specs
            # describe equivalent code, such
            # that we can generate just one
            # shared codeblock for both.
            #
            # This requires that they have equivalent
            # patterns of live-register types, and
            # also equivalent logic to return-to-caller,
            # in particular by both returning via an
            # indirect jump through the same register.
            #
            fun heapcleaner_callspecs_are_equivalent
                    (
                      SPEC_FOR_HEAPCLEANER_CALL { rootholding_registers=>b1, intholding_registers=>i1, floatholding_registers=>f1, return=>tcf::GOTO (ret1, _), ... },
                      SPEC_FOR_HEAPCLEANER_CALL { rootholding_registers=>b2, intholding_registers=>i2, floatholding_registers=>f2, return=>tcf::GOTO (ret2, _), ... }
                    )
                    =>
                    {   fun eq_ea ( tcf::CODETEMP_INFO(_, r1),
                                    tcf::CODETEMP_INFO(_, r2)
                                  )
                                =>
                                rkj::codetemps_are_same_color (r1, r2);

                            eq_ea ( tcf::ADD(_, tcf::CODETEMP_INFO(_, r1), tcf::LITERAL i),
                                    tcf::ADD(_, tcf::CODETEMP_INFO(_, r2), tcf::LITERAL j)
                                  )
                                =>
                                rkj::codetemps_are_same_color (r1, r2)
                                and
                                tcf::mi::eq (32, i, j);                                                         # 64-bit issue: '32' is 'wordbits'.

                            eq_ea _ => FALSE;
                        end;

                        #
                        fun eq_r ( tcf::CODETEMP_INFO (_, r1),
                                   tcf::CODETEMP_INFO (_, r2)
                                 )
                                =>
                                rkj::codetemps_are_same_color (r1, r2);

                            eq_r ( tcf::LOAD(_, ea1, _),
                                   tcf::LOAD(_, ea2, _)
                                 )
                                =>
                                eq_ea (ea1, ea2);

                            eq_r _
                                =>
                                FALSE;
                        end;

                        #
                        fun eq_f ( tcf::CODETEMP_INFO_FLOAT(_, f1),
                                   tcf::CODETEMP_INFO_FLOAT(_, f2)
                                 )
                                =>
                                rkj::codetemps_are_same_color (f1, f2);

                            eq_f ( tcf::FLOAD(_, ea1, _),
                                   tcf::FLOAD(_, ea2, _)
                                 )
                                =>
                                eq_ea (ea1, ea2);

                            eq_f _ => FALSE;
                        end;

                        # Compare two lists; return TRUE iff
                        # they compare pairwise equal per 'predicate'
                        # and are the same length:
                        #
                        fun lists_match  predicate
                            =
                            all'
                            where
                                fun all' ( a ! resta,
                                           b ! restb
                                         )           =>   predicate (a, b)   and   all' (resta, restb);
                                    all' ([],    []) =>   TRUE;
                                    all' _           =>   FALSE;
                                end;
                            end;

                        same_int_expression =   lists_match  eq_r;

                        same_int_expression (b1,   b2  )   and
                        eq_r                (ret1, ret2)   and
                        same_int_expression (i1,   i2  )   and
                        lists_match  eq_f   (f1,   f2  );
                    };

                heapcleaner_callspecs_are_equivalent _
                    =>
                    FALSE;
            end;

            # The following function is called once
            # at the end of compiling a cccomponent.                                                    # "cccomponent" == "callgraph connected-component".
            #
            # For public fns we have the heaplimit checks                                               # Why the difference?  Possibly because public-fn calls use a standardized arg-passing
            # branch to longjumps which in turn jump to their                                           # which makes sharing of heap-cleaner calls a plausible prospect, but private-fn calls use
            # (possibly shared) actual heapcleaner-call codeblock,                                      # customized arg-passing protocols which may not match often enough to make heapcleaner-call
            # but for private functions we have the heaplimit checks                                    # sharing attempts worth the effort...? -- 2011-08-12 CrT
            # branch directly to their heapcleaner-call codeblocks.
            #
            # The actual heapcleaner invocation code is not generated yet.
            #
            # This function is called (only) by   translate_nextcode_cccomponent_to_treecode   in
            #
            #     src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg
            #
            fun put_all_publicfn_heapcleaner_longjumps_and_all_privatefn_heapcleaner_calls_for_cccomponent
                (
                  stream as { put_op,
                              put_private_label,
                              put_fn_liveout_info,
                              ...
                            }
                )
                =
                {   apply  merge_identical_heapcleaner_calls   *public_fn_heaplimit_checks__global;                                         public_fn_heaplimit_checks__global  := [];
                    #
                    apply  put_longjump  *longjumps_to_heapcleaner_calls__global;
                    #
                    apply   (put_heapcleaner_call' { stream, fn_is_public => FALSE })   *private_fn_heaplimit_checks__global;               private_fn_heaplimit_checks__global := [];
                }
                where
                    # The idea here is that we have many heaplimit branch-and-checks
                    # (which are small -- two machine instructions) but that making
                    # separate heapcleaner-call blocks (which are large -- dozens of instructions)
                    # for them all would be a lot of code, and not needed because
                    # many of those heapcleaner-call blocks would be identical anyhow.
                    #
                    # So here we in essence merge all duplicate heapcleaner-call blocks
                    # to save codespace.  Since we haven't actually generated the heapcleaner-call
                    # blocks as yet, "merging" them actually just involves making a pass over
                    # our   public_fn_heaplimit_checks__global  list.
                    #
                    # Here 'heaplimit_branch_target_label' is the codelabel to which
                    # one heaplimit check will jump.  We need to put it on a longjump
                    # which jumps to a compatible heapcleaner-call, where 'compatible'
                    # means it has the same pattern of live register contents.
                    #
                    # We scan the (initially empty)   longjumps_to_heapcleaner_calls__global
                    # list of longjump specs; if we find a longjump to a compatible heapcleaner call
                    # we use it, eitherwise we create a new one and push it to the list:
                    #
                    fun merge_identical_heapcleaner_calls (hcs as SPEC_FOR_HEAPCLEANER_CALL { label_on_heapcleaner_call as REF heaplimit_branch_target_label, ... } )   # Heaplimit-check (not -call!) to process.
                        =
                        merge_identical_heapcleaner_calls'  *longjumps_to_heapcleaner_calls__global
                        where
                            fun merge_identical_heapcleaner_calls' (SPEC_FOR_LONGJUMP_TO_HEAPCLEANER_CALL { spec_for_heapcleaner_call=>hcs', labels_on_longjump } ! rest)
                                    =>
                                    if (heapcleaner_callspecs_are_equivalent (hcs, hcs'))    labels_on_longjump :=  heaplimit_branch_target_label  !  *labels_on_longjump;
                                    else                                                      merge_identical_heapcleaner_calls'  rest;
                                    fi;

                                merge_identical_heapcleaner_calls' []
                                    =>
                                    {   # No compatible longjump, create and push a new one:

                                        # The existing codelabel on the heapcleaner-call spec
                                        # is about to be put on the longjump spec, so give
                                        # the heapcleaner-spec a new one of its own:
                                        #
                                        label_on_heapcleaner_call :=   lbl::make_anonymous_codelabel ();

                                        # Create and push a new longjump spec configured
                                        # to jump to our heapcleaner spec; put heaplimit_branch_target_label
                                        # onto it so the heaplimit check will branch to this longjump:
                                        #
                                        longjumps_to_heapcleaner_calls__global
                                            :=
                                            SPEC_FOR_LONGJUMP_TO_HEAPCLEANER_CALL
                                              {
                                                spec_for_heapcleaner_call =>  hcs,
                                                labels_on_longjump    =>  REF [ heaplimit_branch_target_label ]
                                              }
                                            !
                                            *longjumps_to_heapcleaner_calls__global;
                                    };
                            end;
                        end;

                    # Generate a longjump to a heapcleaner-call routine:
                    #
                    fun put_longjump (SPEC_FOR_LONGJUMP_TO_HEAPCLEANER_CALL { labels_on_longjump => REF [], ... } )
                            =>
                            ();                                                                                                 # We've already done this one. This can happen because our lists get cleared
                                                                                                                                # once per sourcefile but put_longjump_heapcleaner_calls() gets called once
                                                                                                                                # per callgraph connected component within the file -- we're sharing longjumps
                                                                                                                                # and heapcleaner calls between the cccomponents.
                        put_longjump
                            (SPEC_FOR_LONGJUMP_TO_HEAPCLEANER_CALL
                              {
                                labels_on_longjump,
                                spec_for_heapcleaner_call =>  SPEC_FOR_HEAPCLEANER_CALL { label_on_heapcleaner_call, rootholding_registers, intholding_registers, floatholding_registers, ... }
                              }
                            )
                            =>
                            {   live_out   =    live_plain_regs @ live_float_regs
                                                where
                                                    live_plain_regs =   map  tcf::INT_EXPRESSION    (intholding_registers @ rootholding_registers);
                                                    live_float_regs =   map  tcf::FLOAT_EXPRESSION  floatholding_registers;
                                                end;

                                apply  put_private_label  *labels_on_longjump;

                                labels_on_longjump := [];                                                                                               # Remember we've done this one.

                                put_op (tcf::GOTO (tcf::LABEL *label_on_heapcleaner_call, []));

                                put_fn_liveout_info  live_out;
                            };
                    end;
                end;                                                                                                                                    # fun put_longjump_heapcleaner_calls

            fun put_all_publicfn_heapcleaner_calls_for_package  stream
                =
                # We are called (only) from:
                #
                #     src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg
                #
                apply   (put_heapcleaner_call' { stream, fn_is_public => TRUE })   heapcleaner_call_specs
                where
                    heapcleaner_call_specs
                        =
                        map   heapcleaner_call_spec_for_longjump   *longjumps_to_heapcleaner_calls__global
                        where
                            fun heapcleaner_call_spec_for_longjump   (SPEC_FOR_LONGJUMP_TO_HEAPCLEANER_CALL { spec_for_heapcleaner_call, ... })
                                =
                                spec_for_heapcleaner_call;
                        end;

                    longjumps_to_heapcleaner_calls__global := [];
                end;
        end;
    };
end;

Comments and suggestions to: bugs@mythryl.org