## translate-nextcode-to-treecode-g.pkg --- translate nextcode to treecode (and then all the way on down to execode -- raw binary executable machine code).
#
# CONTEXT:
#
# The Mythryl compiler code representations used are, in order:
#
# 1) Raw Syntax is the initial frontend code representation.
# 2) Deep Syntax is the second and final frontend code representation.
# 3) Lambdacode (a polymorphically typed lambda-calculus format) is the first backend code representation, used only transitionally.
# 4) Anormcode (A-Normal format, which preserves expression tree structure) is the second backend code representation, and the first used for optimization.
# 5) Nextcode ("continuation-passing style", a single-assignment basic-block-graph form where call and return are essentially the same) is the third and chief backend tophalf code representation.
# 6) Treecode is the backend tophalf/lowhalf transitional code representation. It is typically slightly specialized for each target architecture, e.g. Intel32 (x86).
# 7) Machcode abstracts the target architecture machine instructions. It gets specialized for each target architecture.
# 8) Execode is absolute executable binary machine instructions for the target architecture.
#
# For general context, see
#
# src/A.COMPILER-PASSES.OVERVIEW
#
# This package implements the transition from the
# machine-independent backend tophalf centered on
#
#
src/lib/compiler/back/top/main/backend-tophalf-g.pkg
#
# to the machine-dependent backend lowhalf centered on
#
#
src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg#
# The lowhalf started out as MLRISC, a compiler-agnostic backend,
# and consequently knows nothing of such Mythryl-specific details
# as Tagged_Int tagging and heap-record tagging and layout, so much of
# our work in this file consists of translating such constructs
# into the low-level load/store/add/branch/... machinecode idiom.
#
# In more concrete terms, this package implements the translation
# from frontend Nextcode (aka "continuation passing style") down
# to backend "Treecode" code format.
#
# Subsequent translation from Treecode down to machcode format is
# delegated to one of
#
#
src/lib/compiler/back/low/pwrpc32/treecode/translate-treecode-to-machcode-pwrpc32-g.pkg#
src/lib/compiler/back/low/sparc32/treecode/translate-treecode-to-machcode-sparc32-g.pkg#
src/lib/compiler/back/low/intel32/treecode/translate-treecode-to-machcode-intel32-g.pkg#
#
#
# Specific tasks performed in this file include:
#
# o Convert fun bodies from linear-sequence-of-instructions
# form to tree form, by entering each codetemp definition
# into the appropriate one of
# codetemp_to_tcf_int_expression__hashtable
# codetemp_to_tcf_float_expression__hashtable
# and then retrieving those tree fragments as we encounter
# later parts of the expression. (nextcode-form necessarily
# computes expressions from the leafs upward, so we will
# always encounter the leafs of each subexpression before
# the root.
# This treeification lets us later use the Sethi-Ullman
# algorithm to re-linearize the code in such a way as to
# minimize register pressure, basically by, for each binary
# math op, computing the more complex operand first -- see
#
src/lib/compiler/back/low/intel32/treecode/translate-treecode-to-machcode-intel32-g.pkg# We also use Sethi-Ullman in:
#
src/lib/compiler/back/low/intel32/regor/regor-intel32-g.pkg# The tophalf has a (broken, unused) version in:
#
src/lib/compiler/back/top/lambdacode/generalized-sethi-ullman-reordering.pkg#
# o Generate code to log all boxed updates to heap_changelog.
# The heap_changelog is essentially a list of CONS cells
# recording all stores of pointers into the heap; the
# heapcleaner uses it to track all pointers from old
# heap generations into younger ones -- multigeneration
# heapcleaning ("garbage collection") is impossible without
# this information. (See calls to log_boxed_update_to_heap_changelog.)
#
# o Expand abstract slot accesses into concrete sequence of
# ram-read instructions. In more detail:
#
# We try to pack closures into registers, but some won't fit,
# so in general a closure can be a heap record. In fact, we
# share parts of some closures, so a complete closure can be
# a depth-two tree of heap records. A related set of closures
# is then technically a depth-two "lattice", since each leaf
# may be referenced by multiple root records.
#
# Nextcode refers to closure slots abstractly, suppressing
# the lattice structure, but Treecode knows nothing about our
# closure structure, so as part of Nextcode -> Treecode translation
# we must translate each closure-slot reference into a concrete
# series of load and add instructions that do the right memory
# fetches from the right record offsets to return the desired value.
# Closures are read-only so we don't have to handle slot writes.
# (Search for VIA_SLOT.)
#
#
# o Compile Tagged_Int arithmetic operations down into sequences
# of vanilla 32-bit integer operations. (See fun tagged_*...)
#
#
#
# o The Nextcode distinction between signed and unsigned
# operands goes away in Treecode, so the un/signed-agnostic
# Nextcode binary operators get replaced by Treecode binary
# operators which are explicitly signed or unsigned. (See
# to_tcf_unsigned_compare and friends.)
#
#
#
#
# Our compiletime generic invocation is from
#
#
src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg#
# which in particular passes us the
#
# translate_machcode_cccomponent_to_execode # "machcode" == "(abstract) machine code"
# # "cccomponent" == "callgraph connected-component" (we compile them one at a time).
# function which is our runtime entrypoint into the
# back end.
#
#
#
# Runtime invocation of our (sole)
#
# translate_nextcode_to_execode
#
# entrypoint is from
#
#
src/lib/compiler/back/top/main/backend-tophalf-g.pkg#
# via the short wrapper at the bottom of
#
#
src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg#
#
# In terms of lines-of-code, this file is utterly dominated by
#
# fun translate_nextcode_to_execode -- 3500 of 4000 line
#
# which in turn is internally dominated by
#
# fun translate_nextcode_cccomponent_to_treecode -- 3200 of 4000 lines.
#
# The core function, and perhaps the best place to start
# reading, is
#
# translate_nextcode_ops_to_treecode
#
# "This version of translate_nextcode_to_treecode_g also
# injects heapcleaner ("garbage-collector") types into
# the backend lowhalf.
# I've also reorganized it a bit and added a few comments
# so that I can understand it."
# -- Allen Leung (?)
# Compiled by:
#
src/lib/compiler/core.sublib### "Do not say a little in many words,
### but a great deal in a few."
###
### -- Pythagoras (582-497 BCE)
stipulate
package ds = deep_syntax; # deep_syntax is from
src/lib/compiler/front/typer-stuff/deep-syntax/deep-syntax.pkg package err = error_message; # error_message is from
src/lib/compiler/front/basics/errormsg/error-message.pkg package ncf = nextcode_form; # nextcode_form is from
src/lib/compiler/back/top/nextcode/nextcode-form.pkg package pcs = per_compile_stuff; # per_compile_stuff is from
src/lib/compiler/front/typer-stuff/main/per-compile-stuff.pkgherein
api Translate_Nextcode_To_Treecode {
#
translate_nextcode_to_execode
:
{ nextcode_functions: List( ncf::Function ),
err: err::Plaint_Sink,
source_name: String, # Typically filename, something like "<stdin>" if compiling interactively.
per_compile_stuff: pcs::Per_Compile_Stuff( ds::Declaration ),
fun_id__to__max_resource_consumption # Given
: # a
ncf::Codetemp # fun_id
-> # return
{ max_possible_heapwords_allocated_before_next_heaplimit_check: Int, # max possible words of heap memory allocated on any path through function body, and
max_possible_nextcode_ops_run_before_next_heaplimit_check: Int # max possible nextcode instructions executed on any path through function body.
}
}
->
(Void -> Int);
# The result is a thunk computing the machinecode bytevector
# offset for entrypoint corresponding to the first function
# in nextcode_functions.
#
# The client must call 'finish' before forcing it.
};
end;
# Machine_Properties is from
src/lib/compiler/back/low/main/main/machine-properties.apistipulate
package chi = per_codetemp_heapcleaner_info; # per_codetemp_heapcleaner_info is from
src/lib/compiler/back/low/main/nextcode/per-codetemp-heapcleaner-info.pkg package coc = compiler_controls; # compiler_controls is from
src/lib/compiler/toplevel/main/compiler-controls.pkg package ctl = global_controls; # global_controls is from
src/lib/compiler/toplevel/main/global-controls.pkg package ds = deep_syntax; # deep_syntax is from
src/lib/compiler/front/typer-stuff/deep-syntax/deep-syntax.pkg package fbp = guess_nextcode_branch_probabilities; # guess_nextcode_branch_probabilities is from
src/lib/compiler/back/low/main/nextcode/guess-nextcode-branch-probabilities.pkg package ffc = find_nextcode_cccomponents; # find_nextcode_cccomponents is from
src/lib/compiler/back/low/main/nextcode/find-nextcode-cccomponents.pkg package iht = int_hashtable; # int_hashtable is from
src/lib/src/int-hashtable.pkg package lbl = codelabel; # codelabel is from
src/lib/compiler/back/low/code/codelabel.pkg package lem = lowhalf_error_message; # lowhalf_error_message is from
src/lib/compiler/back/low/control/lowhalf-error-message.pkg package lhn = lowhalf_notes; # lowhalf_notes is from
src/lib/compiler/back/low/code/lowhalf-notes.pkg package ncf = nextcode_form; # nextcode_form is from
src/lib/compiler/back/top/nextcode/nextcode-form.pkg package pb = pseudo_op_basis_type; # pseudo_op_basis_type is from
src/lib/compiler/back/low/mcg/pseudo-op-basis-type.pkg package pby = probability; # probability is from
src/lib/compiler/back/low/library/probability.pkg package pcs = per_compile_stuff; # per_compile_stuff is from
src/lib/compiler/front/typer-stuff/main/per-compile-stuff.pkg package pl = paired_lists; # paired_lists is from
src/lib/std/src/paired-lists.pkg package ppn = prettyprint_nextcode; # prettyprint_nextcode is from
src/lib/compiler/back/top/nextcode/prettyprint-nextcode.pkg package pt = nextcode_ramregions::pt; # nextcode_ramregions is from
src/lib/compiler/back/low/main/nextcode/nextcode-ramregions.pkg package rgn = nextcode_ramregions; # nextcode_ramregions is from
src/lib/compiler/back/low/main/nextcode/nextcode-ramregions.pkg package rkj = registerkinds_junk; # registerkinds_junk is from
src/lib/compiler/back/low/code/registerkinds-junk.pkg package uvf = use_virtual_framepointer_in_cccomponent; # use_virtual_framepointer_in_cccomponent is from
src/lib/compiler/back/low/main/main/use-virtual-framepointer-in-cccomponent.pkgherein
# This generic is invoked from:
#
#
src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg #
generic package translate_nextcode_to_treecode_g (
# ================================
# # machine_properties_intel32 is from
src/lib/compiler/back/low/main/intel32/machine-properties-intel32.pkg # # machine_properties_pwrpc32 is from
src/lib/compiler/back/low/main/pwrpc32/machine-properties-pwrpc32.pkg # # machine_properties_sparc32 is from
src/lib/compiler/back/low/main/sparc32/machine-properties-sparc32.pkg package mp: Machine_Properties; # Machine_Properties is from
src/lib/compiler/back/low/main/main/machine-properties.api package trx: Treecode_Extension_Mythryl; # Treecode_Extension_Mythryl is from
src/lib/compiler/back/low/main/nextcode/treecode-extension-mythryl.api # platform_register_info_intel32 is from
src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg # platform_register_info_pwrpc32 is from
src/lib/compiler/back/low/main/pwrpc32/backend-lowhalf-pwrpc32.pkg # platform_register_info_sparc32 is from
src/lib/compiler/back/low/main/sparc32/backend-lowhalf-sparc32.pkg package pri: Platform_Register_Info # Platform_Register_Info is from
src/lib/compiler/back/low/main/nextcode/platform-register-info.api where # "pri" == "nextcode_registers".
tcf::rgn == nextcode_ramregions # "rgn" == "region"
also tcf::lac == late_constant # late_constant is from
src/lib/compiler/back/low/main/nextcode/late-constant.pkg also tcf::trx == trx; # "trx" == "treecode_extension".
# "tcf" == "treecode_form".
package cpo: Client_Pseudo_Ops_Mythryl; # Client_Pseudo_Ops_Mythryl is from
src/lib/compiler/back/low/main/nextcode/client-pseudo-ops-mythryl.api # "cpo" == "client_pseudo_op".
package pop: Pseudo_Ops # Pseudo_Ops is from
src/lib/compiler/back/low/mcg/pseudo-op.api where # "pop" == "pseudo_ops".
tcf == pri::tcf # "tcf" == "treecode_form".
also cpo == cpo; # "cpo" == "client_pseudo_ops".
# translate_treecode_to_machcode_intel32_g is from
src/lib/compiler/back/low/intel32/treecode/translate-treecode-to-machcode-intel32-g.pkg package t2m: Translate_Treecode_To_Machcode # Translate_Treecode_To_Machcode is from
src/lib/compiler/back/low/treecode/translate-treecode-to-machcode.api where # "t2m" == "translate_treecode_to_machcode".
tcs::tcf == pri::tcf # "tcf" == "treecode_form".
also tcs::cst::pop == pop; # "pop" == "pseudo_op".
package mkg # make_machcode_codebuffer_g is from
src/lib/compiler/back/low/mcg/make-machcode-codebuffer-g.pkg : Make_Machcode_Codebuffer # Make_Machcode_Codebuffer is from
src/lib/compiler/back/low/mcg/make-machcode-codebuffer.api where
cst == t2m::tcs::cst # "cst" == "codestream".
also mcf == t2m::mcf # "mcf" == "machcode_form" (abstract machine code).
also mcg == t2m::mcg; # "mcg" == "machcode_controlflow_graph".
# put_treecode_heapcleaner_calls_g is from
src/lib/compiler/back/low/main/nextcode/emit-treecode-heapcleaner-calls-g.pkg package ihc: Emit_Treecode_Heapcleaner_Calls # Emit_Treecode_Heapcleaner_Calls is from
src/lib/compiler/back/low/main/nextcode/emit-treecode-heapcleaner-calls.api where # "ihc" == "insert_treecode_heapcleaner_calls".
tcs == t2m::tcs # "tcs" == "treecode_stream".
also mcg == mkg::mcg; # "mcg" == "machcode_controlflow_graph".
package rgk: Registerkinds; # Registerkinds is from
src/lib/compiler/back/low/code/registerkinds.api package cal: Ccalls # Ccalls is from
src/lib/compiler/back/low/ccalls/ccalls.api where # "cal" == "ccalls".
tcf == pri::tcf; # "tcf" == "treecode_form".
translate_machcode_cccomponent_to_execode
:
pcs::Per_Compile_Stuff( ds::Declaration )
->
mkg::mcg::Machcode_Controlflow_Graph
->
Void;
)
: (weak) Translate_Nextcode_To_Treecode # Defined above.
{
stipulate
package tag = mp::heap_tags; # Mythryl heapchunk tagwords.
package tcf = pri::tcf; # "tcf" == "treecode_form".
package tcs = t2m::tcs; # "tcs" == "treecode_stream".
package cfa # "cfs" == "convert fun arguments"
=
convert_nextcode_fun_args_to_treecode_g ( # convert_nextcode_fun_args_to_treecode_g is from
src/lib/compiler/back/low/main/nextcode/convert-nextcode-fun-args-to-treecode-g.pkg #
package pri = pri; # "pri" == "platform register info".
package mp = mp; # "mp" == "machine_properties".
);
# Decompose a package ("compilation unit")
# callgraph into connected components:
#
package nfs # "nfs" == "nextcode_function_stack".
=
nextcode_function_stack_g ( tcf ); # nextcode_function_stack_g is from
src/lib/compiler/back/low/main/nextcode/nextcode-function-stack-g.pkg package ma # Memory aliasing
=
memory_aliasing_g ( # memory_aliasing_g is from
src/lib/compiler/back/low/main/nextcode/memory-aliasing-g.pkg #
package rgk = rgk; # "rgk" == "registerkinds".
);
package fcc # C-Calls handling
=
nextcode_c_calls_g ( # nextcode_c_calls_g is from
src/lib/compiler/back/low/main/nextcode/nextcode-ccalls-g.pkg #
package mp = mp; # "mp" == "machine_properties".
package pri = pri; # "pri" == "nextcode_registers".
package t2m = t2m; # "t2m" == "translate_treecode_to_machcode".
package rgk = rgk; # "rgk" == "registerkinds".
package cal = cal; # "cal" == "ccalls".
);
herein
#
fun error msg
=
lem::error("translate_nextcode_to_treecode_g", msg);
# Debugging:
#
fun print_nextcode_fun nextcode_fn
=
{ ctl::print::say "*********************************************** \n";
ppn::print_nextcode_function nextcode_fn;
ctl::print::say "*********************************************** \n";
ctl::print::flush();
};
print = ctl::print::say;
####################################################################
# Heapcleaner ("garbage collection") safety.
# This stuff matters only if
# lowhalf_track_heapcleaner_type_info
# is TRUE, which it currently never is:
package hr
= # How to annotate heapcleaner (garbage collector) information.
codetemps_with_heapcleaner_info_g ( # codetemps_with_heapcleaner_info_g is from
src/lib/compiler/back/low/heapcleaner-safety/codetemps-with-heapcleaner-info-g.pkg #
package rgk = rgk; # "rgk" == "registerkinds".
package chi = chi; # "chi" == "per_codetemp_heapcleaner_info".
);
no_opt = [lhn::no_optimization.x_to_note ()];
#
fun same_reg_as x y
=
rkj::same_id (x, y);
ptr_type = lhn::mark_reg.x_to_note (\\ r = hr::set_heapcleaner_info_on_codetemp_info (r, chi::ptr_type)); # Boxed chunks
i32_type = lhn::mark_reg.x_to_note (\\ r = hr::set_heapcleaner_info_on_codetemp_info (r, chi::i32_type)); # untagged integers
i31_type = lhn::mark_reg.x_to_note (\\ r = hr::set_heapcleaner_info_on_codetemp_info (r, chi::i31_type)); # tagged integers
f64_type = lhn::mark_reg.x_to_note (\\ r = hr::set_heapcleaner_info_on_codetemp_info (r, chi::f64_type)); # untagged floats
#
fun ncftype_to_note ncf::typ::INT => i31_type;
ncftype_to_note ncf::typ::INT1 => i32_type;
ncftype_to_note ncf::typ::FLOAT64 => f64_type;
ncftype_to_note _ => ptr_type;
end;
# Convert kind+bitsize to heapcleaner type.
# Kind is INT/UNT/FLOAT:
#
fun kind_and_size_to_heapcleaner_type (ncf::p::INT 31) => chi::i31_type; # 64-bit issue: '31' is bits-per-tagged-int.
kind_and_size_to_heapcleaner_type (ncf::p::UNT 31) => chi::i31_type; # 64-bit issue: '31' is bits-per-tagged-unt.
kind_and_size_to_heapcleaner_type (ncf::p::INT 32) => chi::i32_type; # 64-bit issue: '31' is bits-per-tagged-int.
kind_and_size_to_heapcleaner_type (ncf::p::UNT 32) => chi::i32_type; # 64-bit issue: '31' is bits-per-tagged-unt.
#
kind_and_size_to_heapcleaner_type (_ ) => error "kind_and_size_to_heapcleaner_type";
end;
#
fun ncftype_to_heapcleaner_type (ncf::typ::FLOAT64) => chi::f64_type;
ncftype_to_heapcleaner_type (ncf::typ::INT ) => chi::i31_type;
ncftype_to_heapcleaner_type (ncf::typ::INT1 ) => chi::i32_type;
ncftype_to_heapcleaner_type _ => chi::ptr_type;
end;
# Make a heapcleaner livein/liveout annotation.
# This is a list of (register_id, heapcleaner_type) pairs:
#
fun make_heapcleaner_liveinliveout_note
(
an,
args, # Formal args (i.e., parameters).
ncftypes # Types of formal args -- this list will always be same length as 'args'.
)
=
an (collect (args, ncftypes, []))
where
fun collect ( tcf::INT_EXPRESSION ( tcf::CODETEMP_INFO(_, r)) ! args, ncftype ! ncftypes, results) => collect (args, ncftypes, (r, ncftype_to_heapcleaner_type ncftype) ! results);
collect (tcf::FLOAT_EXPRESSION (tcf::CODETEMP_INFO_FLOAT(_, r)) ! args, ncftype ! ncftypes, results) => collect (args, ncftypes, (r, ncftype_to_heapcleaner_type ncftype) ! results);
collect (_ ! args, _ ! ncftypes, results)
=>
collect (args, ncftypes, results);
collect ([], [], results)
=>
results;
collect _ => error "make_heapcleaner_liveinliveout_note";
end;
end;
# These are the type widths of Mythryl.
# They are hardwired for now.
#
ptr_bitsize = 32; # Size of Mythryl's pointer XXX SUCKO FIXME 64-BIT ISSUE
int_bitsize = 32; # Size of Mythryl's integer XXX SUCKO FIXME 64-BIT ISSUE
flt_bitsize = 64; # Size of Mythryl's real number
zero = tcf::LITERAL 0;
one = tcf::LITERAL 1;
two = tcf::LITERAL 2;
tagged_zero = one;
offp0 = ncf::SLOT 0;
#
fun int i = tcf::LITERAL (tcf::mi::from_int (int_bitsize, i)); # "li" == "int-literal".
fun unt u = tcf::LITERAL (tcf::mi::from_unt1 (int_bitsize, u)); # "lu" == "unt-literal".
const_base_pointer_reg_offset
=
int mp::const_base_pointer_reg_offset;
# The heap allocation pointer -- we allot
# heap memory just by advancing this pointer.
# It must be in a true hardware register: # i.e., not in an Intel32 ramreg -- a stack slot used as a workaround for the register shortage on that architecture.
#
heap_allocation_pointer_register
=
case pri::heap_allocation_pointer
#
tcf::CODETEMP_INFO (_, heap_allocation_pointer_register) => heap_allocation_pointer_register;
_ => error "heap_allocation_pointer_register";
esac;
global_registers # Global registers allocated statically by hand.
=
map (\\ r = tcf::INT_EXPRESSION (tcf::CODETEMP_INFO (int_bitsize, r))) pri::global_int_registers # On intel32 this is ESP and EDI.
@
map (\\ f = tcf::FLOAT_EXPRESSION (tcf::CODETEMP_INFO_FLOAT (flt_bitsize, f))) pri::global_float_registers; # On intel32 there are no globally allocated float registers -- they are all available to the register allocator.
global_registers # On sparc32 and pwrpc32 we also globally dedicate a condition code register to heaplimit checks.
=
case pri::heap_is_exhausted__test
#
THE cc => tcf::FLAG_EXPRESSION cc ! global_registers; # "cc" is "condition code" -- zero/parity/overflow/... flag stuff.
NULL => global_registers;
esac;
do_extra_lowhalf_optimizations # This flag controls whether extra lowhalf optimizations should be performed. Defaults to FALSE.
= # XXX BUGGO FIXME icky thread-hostile global mutable state.
ctl::lowhalf::make_bool #
( "do_extra_lowhalf_optimizations",
"whether to do lowhalf optimizations"
);
track_types_for_heapcleaner # XXX BUGGO FIXME icky thread-hostile global mutable state.
=
ctl::lowhalf::make_bool # Defaults to FALSE.
( "track_types_for_heapcleaner",
"whether to track heapcleaner type info"
); # If this flag is TRUE then annotate the
# codetemps with heapcleaner type info;
# otherwise use the default behavior.
#
# This flag is always FALSE; I think
# this is another unfinished project.
# The relevant files appear to be:
#
#
src/lib/compiler/back/low/heapcleaner-safety/per-codetemp-heapcleaner-info-template.api #
src/lib/compiler/back/low/main/nextcode/per-codetemp-heapcleaner-info.api #
src/lib/compiler/back/low/main/nextcode/per-codetemp-heapcleaner-info.pkg #
src/lib/compiler/back/low/heapcleaner-safety/codetemps-with-heapcleaner-info.api #
src/lib/compiler/back/low/heapcleaner-safety/codetemps-with-heapcleaner-info-g.pkg lowhalf_optimize_before_making_heapcleaner_code # XXX BUGGO FIXME icky thread-hostile global mutable state.
=
ctl::lowhalf::make_bool # Defaults to FALSE.
( "lowhalf_optimize_before_making_heapcleaner_code",
"whether to optimize before generating heapcleaner code"
); # If this flag is on then we do optimizations before generating heapcleaner code.
# If this flag is on then track_types_for_heapcleaner must also be turned on!
# Otherwise use the default behavior.
split_entry_block # XXX BUGGO FIXME icky thread-hostile global mutable state.
=
ctl::lowhalf::make_bool # Defaults to FALSE.
( "split_entry_block",
"whether to split entry block"
); # If this flag is on then split the entry block.
# This should be on for SSA optimizations.
empty_block = lhn::empty_block.x_to_note (); # Dummy annotation used to get an empty block.
tagword_to_int = large_unt::to_int; # Converts heap-record tagword to int.
# The main code generation function.
#
# This represents the major entrypoint into
# the machine-dependent backend lower half
# from the machine-independent upper half.
#
# We are called from translate_anormcode_to_execode in
#
#
src/lib/compiler/back/top/main/backend-tophalf-g.pkg fun translate_nextcode_to_execode
{
nextcode_functions: List( ncf::Function ), # All the functions for a complete package ("compilation unit").
fun_id__to__max_resource_consumption # Given
: # a
ncf::Codetemp # fun_id
-> # return
{ max_possible_heapwords_allocated_before_next_heaplimit_check: Int, # max possible words of heap memory allocated before next heaplimit check, and
max_possible_nextcode_ops_run_before_next_heaplimit_check: Int # max possible nextcode instructions executed before next heaplimit check.
},
err,
source_name, # Typically sourcefile name, something like "<stdin>" if compiling interactively.
per_compile_stuff
}
=
{ apply note_entrypoint_label_and_type nextcode_functions;
#
cccomponents = ffc::find_nextcode_cccomponents nextcode_functions; # Break the 'nextcode_functions' callgraph up into connected components.
apply translate_nextcode_cccomponent_to_treecode cccomponents; # This is where all the work is...
finish_compilation_unit source_name;
# Here we construct and return to caller a thunk which computes the
# entrypoint offset into the machinecode bytevector. (This is the
# address which at linktime will be called with a table of all loaded
# packages; the package will note all needed resources and return
# its own list of exported functions and other values.)
#
# The idea is that in principle this address could be anywhere in the
# compiled code, and the address might not be fixed until the sizes of
# span-depdendent instructions (i.e., pc-relative jumps) has been decided,
# so our caller should finish code generation before calling this thunk.
#
# In practice the entrypoint is always zero, and this whole charade
# could and maybe should be dispensed with.
get_entrypoint_offset_of_first_function nextcode_functions; # A (Void -> Int) thunk returning entrypoint offset into machinecode bytevector.
} # (In practice this is currently always zero.)
where
max_possible_heapwords_allocated_before_next_heaplimit_check
=
.max_possible_heapwords_allocated_before_next_heaplimit_check o fun_id__to__max_resource_consumption;
split_entry_block = *split_entry_block;
# These functions generate new codetemps and
# mark expressions with their heapcleaner types.
#
# When the heapcleaner-safety feature is turned on,
# we'll use the versions of make_int_codetemp_info that automatically
# update the heapcleaner-map.
#
# Otherwise, we'll just use the normal version.
track_types_for_heapcleaner = *track_types_for_heapcleaner;
my ( make_int_codetemp_info,
make_int_codetemp_info_with_ncftype,
make_int_codetemp_info_with_kind_and_size,
make_float_codetemp_info
)
=
if (not track_types_for_heapcleaner) # Currently track_types_for_heapcleaner is ALWAYS FALSE.
#
( rgk::make_int_codetemp_info, # We're not tracking heapcleaner types for the backend lowhalf, so no extra work to do here.
rgk::make_int_codetemp_info,
rgk::make_int_codetemp_info,
rgk::make_float_codetemp_info
);
else
# We're tracking heapcleaner types for the backend lowhalf,
# so redefine our make-codetemp fns to track heapcleaner info:
#
make_int_codetemp_info = hr::make_codetemp_info_of_kind rkj::INT_REGISTER; # Currying is important here for efficiency -- make_codetemp_info_of_kind is slow,
make_float_codetemp_info = hr::make_codetemp_info_of_kind rkj::FLOAT_REGISTER; # but make_int_codetemp_info is fast.
#
fun make_int_codetemp_info_with_ncftype ncftype
=
make_int_codetemp_info (ncftype_to_heapcleaner_type ncftype);
#
fun make_int_codetemp_info_with_kind_and_size kind_and_size
=
make_int_codetemp_info (kind_and_size_to_heapcleaner_type kind_and_size);
( make_int_codetemp_info,
make_int_codetemp_info_with_ncftype,
make_int_codetemp_info_with_kind_and_size,
make_float_codetemp_info
);
fi;
# Maybe wrap heapcleaner type around ptr/i32/flt:
#
fun hc_ptr e = if track_types_for_heapcleaner tcf::RNOTE (e, ptr_type); else e; fi;
fun hc_i32 e = if track_types_for_heapcleaner tcf::RNOTE (e, i32_type); else e; fi;
fun hc_flt e = if track_types_for_heapcleaner tcf::FNOTE (e, f64_type); else e; fi;
#
fun maybe_note_type_for_heapcleaner (e, ncftype)
=
track_types_for_heapcleaner ?? tcf::RNOTE (e, ncftype_to_note ncftype)
:: e;
#
fun mark_nothing e
=
e;
# Private ("all-callers-known") functions have
# parameters passed in fresh temporaries.
#
# We (may) also annotate the heapcleaner types of these temporaries:
#
fun translate_function_formal_args_from_nextcode_to_treecode_form (ncftype ! rest)
=>
case ncftype
#
ncf::typ::FLOAT64 => tcf::FLOAT_EXPRESSION (tcf::CODETEMP_INFO_FLOAT (flt_bitsize, make_float_codetemp_info chi::f64_type));
#
ncf::typ::INT => tcf::INT_EXPRESSION (tcf::CODETEMP_INFO (int_bitsize, make_int_codetemp_info chi::i31_type));
ncf::typ::INT1 => tcf::INT_EXPRESSION (tcf::CODETEMP_INFO (int_bitsize, make_int_codetemp_info chi::i32_type));
_ => tcf::INT_EXPRESSION (tcf::CODETEMP_INFO (ptr_bitsize, make_int_codetemp_info chi::ptr_type));
esac
!
translate_function_formal_args_from_nextcode_to_treecode_form rest;
translate_function_formal_args_from_nextcode_to_treecode_form []
=>
[];
end;
# fun_id__to__codelabel__hashtable maps function ids
# (ncf::lvars -- in practice, ints) to the codelabels
# for those functions.
#
# If the flag split_entry_block is on we also
# distinguish between public and privatel labels,
# making sure that no branches go directly to the
# public labels.
exception LABEL_BIND;
exception TYPE_TABLE;
stipulate
my fun_id__to__codelabel__hashtable: iht::Hashtable ( lbl::Codelabel )
= iht::make_hashtable { size_hint => 32, not_found_exception => LABEL_BIND };
herein
get_codelabel_for_fun_id = iht::get fun_id__to__codelabel__hashtable;
set_codelabel_for_fun_id = iht::set fun_id__to__codelabel__hashtable;
end;
#
stipulate
ncflvar_to_ncftype = iht::make_hashtable { size_hint => 32, not_found_exception => TYPE_TABLE } # ncflvar_to_ncftype is a mapping of ncf::lvars to nextcode types
: iht::Hashtable ( ncf::Type );
herein
set_ncftype_for_codetemp = iht::set ncflvar_to_ncftype;
get_ncftype_for_codetemp = iht::get ncflvar_to_ncftype; # This maps nextcode value expressions to nextcode types.
end;
#
fun note_entrypoint_label_and_type (callers_info, fun_id, _, _, _) # define the labels and ncftype for all nextcode functions.
=
{ set_codelabel_for_fun_id (fun_id, lbl::make_anonymous_codelabel()); # Private label.
#
if split_entry_block # Public label.
#
case callers_info
#
(ncf::FATE_FN
| ncf::PUBLIC_FN)
=>
set_codelabel_for_fun_id
(
-fun_id - 1, # This -fun_id - 1 crap could be simplified to just -f if we just guaranteed that all valid labels are nonzero. XXX SUCKO FIXME.
lbl::make_codelabel_generator (int::to_string fun_id) ()
);
_ => ();
esac;
fi;
case callers_info
#
ncf::FATE_FN => set_ncftype_for_codetemp (fun_id, ncf::typ::FATE);
_ => set_ncftype_for_codetemp (fun_id, ncf::bogus_pointer_type);
esac;
}; # fun note_entrypoint_label_and_type
#
fun_id__to__branch_probability
=
fbp::guess_nextcode_branch_probabilities nextcode_functions # Compute probabilities, stash them in a hashtable, return lookup function.
:
ncf::Codetemp -> Null_Or(pby::Probability);
#
fun branch_with_probability (branch, THE probability) => tcf::NOTE (branch, lhn::branch_probability.x_to_note probability);
branch_with_probability (branch, NULL ) => branch;
end;
# A nextcode register may be implemented as a physical
# register or a memory location. This function moves
# a value v into a register or a memory location. # "rreg" == "reg_or_ramreg".
#
fun set_rreg (tcf::CODETEMP_INFO (type, r), v) => tcf::LOAD_INT_REGISTER (type, r, v);
set_rreg (tcf::LOAD (type, ea, mem), v) => tcf::STORE_INT (type, ea, v, mem);
set_rreg _ => error "set_rreg";
end;
# Translate one nextcode cccomponent to # "cccomponent" == "callgraph connected-component".
# treecode (and thence immediately to machcode):
#
fun translate_nextcode_cccomponent_to_treecode
#
(cccomponent: List( ncf::Function ))
#
: Void # Void because results are side-effected :-( onto dataseg_list and textseg_list in
# #
# #
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer-intel32-g.pkg # #
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer-pwrpc32-g.pkg = #
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer-sparc32-g.pkg { if *ctl::debugging
#
apply
ppn::print_nextcode_function
cccomponent; # cccomponent is just a List( ncf::Function ).
fi;
(t2m::make_treecode_to_machcode_codebuffer (mkg::make_machcode_codebuffer ())) # make_machcode_codebuffer is from
src/lib/compiler/back/low/mcg/make-machcode-codebuffer-g.pkg ->
buf;
# The above constructs the wrapped codebuffer which
# translates treecode to machine code and then holds
# the resulting machcode until asked to regurgitate it.
#
# We're going to do lots of
#
# buf.put_op( treecode_expression )
#
# calls to construct our machine-code controlflow
# graph (while simultaneously translating from treecode
# to machcode) and then one
#
# result = buf.get_completed_cccomponent ...
#
# call to retrieve the resulting controlflow graph.
#
# The treecode_expression is translated to abstract machinecode by one of
#
#
src/lib/compiler/back/low/intel32/treecode/translate-treecode-to-machcode-intel32-g.pkg #
src/lib/compiler/back/low/pwrpc32/treecode/translate-treecode-to-machcode-pwrpc32-g.pkg #
src/lib/compiler/back/low/sparc32/treecode/translate-treecode-to-machcode-sparc32-g.pkg #
# which in turn use put_* commands which drive
#
#
src/lib/compiler/back/low/mcg/make-machcode-codebuffer-g.pkg
#
# to construct an actual machine code controlflow graph, i.e. an instance of one of
#
# machcode_controlflow_graph_intel32 from
src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg # machcode_controlflow_graph_pwrpc32 from
src/lib/compiler/back/low/main/pwrpc32/backend-lowhalf-pwrpc32.pkg # machcode_controlflow_graph_sparc32 from
src/lib/compiler/back/low/main/sparc32/backend-lowhalf-sparc32.pkg #
# all of which are generated by
#
#
src/lib/compiler/back/low/mcg/machcode-controlflow-graph-g.pkg # per
src/lib/compiler/back/low/mcg/machcode-controlflow-graph.api # If ncf::RAW_C_CALL is present we need
# to use the virtual frame pointer:
#
stipulate
#
fun has_raw_c_call ((_, _, _, _, cexp) ! rest) # There HAS to be a better way of tracking this information than
=> # doing a complete code pass here to get one bit of information. XXX SUCKO FIXME.
ncf::has_raw_c_call cexp
or
has_raw_c_call rest;
has_raw_c_call [] => FALSE;
end;
herein
use_virtual_framepointer
=
not mp::framepointer_never_virtual
and
has_raw_c_call cccomponent;
end;
uvf::use_virtual_framepointer # This gets read (only) one place -- fun stack_basepointer ()
:= # in
src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg use_virtual_framepointer;
heap_is_exhausted__test
=
# This is the
#
# heap_allocation_pointer > heap_allocation_limit
#
# comparison test used -- when this is TRUE, it is time to
# run the heapcleaner ("garbage collector").
#
# We have a per-platform choice of signed vs unsigned comparisons.
#
# This usually doesn't matter, but some
# architectures work better one way or
# the other, so we are given a choice here.
#
tcf::CMP (
#
ptr_bitsize,
pri::use_signed_heaplimit_check
?? tcf::GT
:: tcf::GTU,
pri::heap_allocation_pointer,
pri::heap_allocation_limit use_virtual_framepointer
);
#############################################################
# Per-cccomponent tables
exception INT_REGISTER_MAP;
exception FLOAT_REGISTER_MAP;
exception GEN_TABLE;
stipulate
my fun_id__to__callers_info__hashtable: iht::Hashtable( nfs::Callers_Info )
=
iht::make_hashtable { size_hint => length cccomponent, not_found_exception => GEN_TABLE };
#
# Used to retrieve the arg passing convention
# once a function has been compiled.
herein
set__callers_info__for__fun_id = iht::set fun_id__to__callers_info__hashtable;
get__callers_info__for__fun_id = iht::get fun_id__to__callers_info__hashtable;
end;
# { fp, gp } RegTable -- mapping of lvars to registers
codetemp_to_tcf_float_expression__hashtable
#
= iht::make_hashtable { size_hint => 2, not_found_exception => FLOAT_REGISTER_MAP }
: iht::Hashtable( tcf::Float_Expression );
codetemp_to_tcf_int_expression__hashtable
#
= iht::make_hashtable { size_hint => 32, not_found_exception => INT_REGISTER_MAP }
: iht::Hashtable( tcf::Int_Expression );
set_int_def_for_codetemp = iht::set codetemp_to_tcf_int_expression__hashtable;
#
fun set_int_def_for_codetemp' (codetemp, r)
=
set_int_def_for_codetemp (codetemp, tcf::CODETEMP_INFO (int_bitsize, r));
# PRODUCTION VERSION:
# set_float_def_for_codetemp
# =
# iht::set codetemp_to_tcf_float_expression__hashtable;
# TEMPORARY DEBUG VERSION:
fun set_float_def_for_codetemp (arg as (x, t))
=
{
if *log::debugging
printf "set_float_def_for_codetemp (%d, ...)\n" x;
fi;
iht::set codetemp_to_tcf_float_expression__hashtable arg;
};
# The following function looks up
# the Treecode expression associated
# with a floating point value expression:
#
# get_tcf_float_expression_for_codetemp
# =
# iht::get codetemp_to_tcf_float_expression__hashtable;
fun get_tcf_float_expression_for_codetemp x
=
{
if *log::debugging
printf "get_tcp_float_expression_for_codetemp %d\n" x;
fi;
iht::get codetemp_to_tcf_float_expression__hashtable x;
};
#
fun def_for_float_codetemp (ncf::CODETEMP v) => get_tcf_float_expression_for_codetemp v;
def_for_float_codetemp _ => error "def_for_float_codetemp";
end;
# To do Sethi-Ullman register-use minimization -- see
#
#
src/lib/compiler/back/low/intel32/treecode/translate-treecode-to-machcode-intel32-g.pkg #
# -- we now need to convert our linear
# code blocks into expression trees.
# To do this, we first classify each subexpression
# according to the number of times its value is used:
#
Codetemp_Use_Frequency
#
= NO_USES # Codetemp is never used -- it can be discarded if pure.
| ONE_USE
# Codetemp is used exactly once -- we will inline the expression at its use point.
| MULTIPLE_USES
# Codetemp is used two or more times -- we will to leave it in place.
| ONE_USE_AND_INLINED
# Codetemp is ONE_USE and has been (or will be) inlined at its use point.
;
# The following function is used to translate nextcode into
# larger trees. Definitions marked ONE_USE can be forward
# propagated to their (only) use. This can drastically
# reduce register pressure.
stipulate
exception CODETEMP_USE_FREQUENCY_HASHTABLE;
herein
my codetemp_use_frequency_hashtable: iht::Hashtable( Codetemp_Use_Frequency )
= iht::make_hashtable { size_hint => 32, not_found_exception => CODETEMP_USE_FREQUENCY_HASHTABLE };
end;
#
fun get_codetemp_use_frequency i
=
the_else (iht::find codetemp_use_frequency_hashtable i, NO_USES);
set_codetemp_use_frequency
=
iht::set codetemp_use_frequency_hashtable;
#
fun set_codetemp_use_frequency_to__one_use_and_inlined r
=
set_codetemp_use_frequency (r, ONE_USE_AND_INLINED);
# Reset the register and expression-usage hashtables:
#
fun clear_hashtables ()
=
{ iht::clear codetemp_to_tcf_int_expression__hashtable;
iht::clear codetemp_to_tcf_float_expression__hashtable;
iht::clear codetemp_use_frequency_hashtable;
};
# Memory disambiguation uses
# the new register counters,
# so those must be reset here.
rgk::reset_codetemp_id_allocation_counters ();
mem_disambig
=
ma::analyze_memory_aliasing_of_nextcode_functions cccomponent;
# Points-to analysis projection.
#
fun projection (x as REF (pt::TOP _), _)
=>
x;
projection (x, i)
=>
pt::ith_projection (x, i);
end;
stipulate
must_disambiguate_memory # Normally FALSE.
=
*coc::disambiguate_memory;
herein
#
fun get_ramregion e
=
if must_disambiguate_memory
#
case e
ncf::CODETEMP v => mem_disambig v;
_ => rgn::readonly;
esac;
else
rgn::memory;
fi;
#
fun get_ramregion_projection (e, i)
=
if must_disambiguate_memory
#
case e
ncf::CODETEMP v => projection (mem_disambig v, i);
_ => rgn::readonly;
esac;
else
rgn::memory;
fi;
end;
#
fun get_dataptr_ramregion v
=
get_ramregion_projection (v, 0);
# fun get_rw_vector_ramregion (x as REF (pt::TOP _)) => x;
# get_rw_vector_ramregion x => pt::weak_subscript x;
# end;
#
# For safety, let's assume it's
# the global memory right now:
#
fun get_rw_vector_ramregion _
=
rgn::memory;
# This keeps track of the accumulated advances of the
# heap_allocation_pointer since the start of the nextcode fn.
# This is important for generating the correct address offset
# for newly allocated records.
#
advanced_heap_ptr = REF 0;
# Return the nextcode type for
# a nextcode value expression:
#
fun ncftype_of (ncf::CODETEMP v) => get_ncftype_for_codetemp v;
ncftype_of (ncf::LABEL v) => get_ncftype_for_codetemp v;
#
ncftype_of (ncf::INT _) => ncf::typ::INT;
ncftype_of (ncf::INT1 _) => ncf::typ::INT1;
ncftype_of (ncf::TRUEVOID ) => ncf::typ::FLOAT64; # What?? -- 2011-08-16 CrT ncf::TRUEVOID comes only from a line in
src/lib/compiler/back/top/closures/make-nextcode-closures-g.pkg #
ncftype_of _ => ncf::bogus_pointer_type;
end;
# 'base_pointer' contains the start address
# of the entire compilation unit.
#
# Here we generate the address of a label that
# is embedded in the same compilation unit.
# The generated address is relative to 'base_pointer'.
#
# For heapcleaner safety, we consider
# this to be a chunk reference.
#
fun make_code_for_label_address (codelabel, k)
=
hc_ptr e
where
e = tcf::ADD ( # base_pointer + (codelabel + (k - mp::const_base_pointer_reg_offset))
pri::address_width,
pri::base_pointer use_virtual_framepointer,
tcf::LABEL_EXPRESSION (
tcf::ADD (
pri::address_width,
tcf::LABEL codelabel,
tcf::LITERAL (
multiword_int::from_int
(k - mp::const_base_pointer_reg_offset)
) ) ) );
end;
# The following function looks up the Treecode expression
# associated with a general purpose value expression.
#
get_int_def_for_codetemp
=
iht::get codetemp_to_tcf_int_expression__hashtable;
#
fun resolve_heap_ptr_offset (tcf::LATE_CONSTANT absolute_heap_ptr_offset)
=>
# Here we resolve address computations of the form
#
# tcf::LATE_CONSTANT k
#
# where offset is a reference to the kth byte allocated
# since the beginning of the nextcode fn.
#
{ tmp_r = make_int_codetemp_info chi::ptr_type;
#
offset = absolute_heap_ptr_offset - *advanced_heap_ptr;
buf.put_op # tmp_r := heap_allocation_pointer + offset;
(tcf::LOAD_INT_REGISTER
(
ptr_bitsize,
tmp_r,
tcf::ADD
(
pri::address_width,
pri::heap_allocation_pointer,
int offset
) ) );
tcf::CODETEMP_INFO (ptr_bitsize, tmp_r);
};
resolve_heap_ptr_offset e
=>
e;
end;
#
fun resolve_heap_ptr_offset' (tcf::LATE_CONSTANT absolute_heap_ptr_offset)
=>
# As above, but here we generate the address but do not store it into
# a register (codetemp); this allows use in more complex subexpressions:
#
{ offset = absolute_heap_ptr_offset - *advanced_heap_ptr;
#
hc_ptr (tcf::ADD (pri::address_width, pri::heap_allocation_pointer, int offset)); # heap_allocation_pointer + offset
};
resolve_heap_ptr_offset' other
=>
other;
end;
#
fun def_for_int_codetemp (ncf::CODETEMP v) => resolve_heap_ptr_offset (get_int_def_for_codetemp v);
def_for_int_codetemp (ncf::INT i) => int (i+i+1); # Convert to tagged int format by leftshifting and setting low bit.
def_for_int_codetemp (ncf::INT1 u) => unt u; # XXX SUCKO FIXME: should have an explicit fn for converting to tagged int.
def_for_int_codetemp (ncf::LABEL v)
=>
make_code_for_label_address (get_codelabel_for_fun_id (split_entry_block ?? -v - 1 :: v), 0);
def_for_int_codetemp _ => error "def_for_int_codetemp";
end;
#
fun def_for_int_codetemp' (ncf::CODETEMP v) => resolve_heap_ptr_offset' (get_int_def_for_codetemp v); # The only line that differs from above fun.
def_for_int_codetemp' (ncf::INT i) => int (i+i+1); # Convert to tagged int format by leftshifting and setting low bit.
def_for_int_codetemp' (ncf::INT1 u) => unt u; # XXX SUCKO FIXME: should have an explicit fn for converting to tagged int.
def_for_int_codetemp' (ncf::LABEL v)
=>
make_code_for_label_address (get_codelabel_for_fun_id (split_entry_block ?? -v - 1 :: v), 0);
def_for_int_codetemp' _ => error "def_for_int_codetemp'";
end;
# On entry to a function the args are in
# standardized registers. The function-body
# code immediately copies the args to fresh
# codetemps. This frees the argument values
# to move elsewhere and is critical in avoiding
# artificial codetemp interferences:
#
fun copy_args_to_arg_codetemps (rl, vl, tl) # == (args, codetemps, types) -- we're copying 'args' to 'codetemps'.
=
{ (e_copy (vl, rl, [], [], [], [])) # == (codetemps, args, destregs, srcregs, codetemps', args')
->
(vl', rl');
e_fcopy (e_other (vl', rl', [], []));
pl::apply set_ncftype_for_codetemp (vl, tl);
}
where
fun e_copy([], [], [], [], xs', rl')
=>
(xs', rl'); # Done.
e_copy (x ! xs, tcf::INT_EXPRESSION (tcf::CODETEMP_INFO(_, r)) ! rl, rds, rss, xs', rl') # "rds" = "dst registers"; "rss" == "src registers".
=>
{ t = make_int_codetemp_info chi::ptr_type;
#
set_int_def_for_codetemp' (x, t);
#
e_copy (xs, rl, t ! rds, r ! rss, xs', rl');
};
e_copy (x ! xs, r ! rl, rds, rss, xs', rl')
=>
e_copy (xs, rl, rds, rss, x ! xs', r ! rl');
e_copy([], [], rds, rss, xs', rl')
=>
{ buf.put_op (tcf::MOVE_INT_REGISTERS (int_bitsize, rds, rss));
(xs', rl');
};
e_copy (([], _ ! _, _, _, _, _)
| (_ ! _, [], _, _, _, _))
=>
error "e_copy";
end;
#
fun e_other([], [], xs, rl)
=>
(xs, rl);
e_other (x ! xs, (tcf::INT_EXPRESSION r) ! rl, xs', rl')
=>
{ t = make_int_codetemp_info chi::ptr_type;
#
set_int_def_for_codetemp' (x, t);
buf.put_op (tcf::LOAD_INT_REGISTER (int_bitsize, t, r));
e_other (xs, rl, xs', rl');
};
e_other (x ! xs, (tcf::FLOAT_EXPRESSION (tcf::CODETEMP_INFO_FLOAT(_, f))) ! rl, xs', rl')
=>
e_other (xs, rl, x ! xs', f ! rl');
e_other (_, tcf::FLOAT_EXPRESSION _ ! _, _, _)
=>
error "e_other: FPR but not FREG";
e_other (_, tcf::FLAG_EXPRESSION _ ! _, _, _)
=>
error "e_other: FLAG_EXPRESSION";
e_other (([], _ ! _, _, _)
| (_ ! _, [], _, _))
=>
error "e_other";
end;
#
fun e_fcopy ([], [])
=>
();
e_fcopy (xs, rl)
=>
{ fs = map (\\ _ = make_float_codetemp_info chi::f64_type)
xs;
pl::apply
(\\ (x, f) = set_float_def_for_codetemp (x, tcf::CODETEMP_INFO_FLOAT (flt_bitsize, f)))
(xs, fs);
buf.put_op (tcf::MOVE_FLOAT_REGISTERS (flt_bitsize, fs, rl));
};
end;
end;
#############################################################################
# Nomenclature: "hap_offset" == "heap_allocation_pointer_offset".
#
# Motivation:
# On x86 heap_allocation_pointer permanently owns the EDI register -- see
#
#
src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg #
# (Other platforms are similar.)
#
# Conceptually we allot heapspace by sequences like
#
# *heap_allocation_pointer_offset++ = record_tagword;
# *heap_allocation_pointer_offset++ = field_1;
# *heap_allocation_pointer_offset++ = field_2;
# ...
# *heap_allocation_pointer_offset++ = field_n;
#
# In practice that is too slow -- the pointer-increments take ALU resources,
# and also introduce data dependencies which inhibit multiple instruction
# issue (on-the-fly parallelism) on modern microprocessors, so it is better to do
#
# heap_allocation_pointer_offset[0] = record_tagword;
# heap_allocation_pointer_offset[1] = field_1;
# heap_allocation_pointer_offset[2] = field_2;
# ...
# heap_allocation_pointer_offset[n] = field_n;
# heap_allocation_pointer_offset += n+1;
#
# If the different fields are being generated by a complex of functions,
# the latter approach requires that we keep track of how much heapspace
# has been allocated since the last update to the heap_allocation_pointer.
# That is the function of the 'hap_offet' values used in this package.
#############################################################################
#
fun update_heap_allocation_pointer hap_offset # "hap_offset" == "heap_allocation_pointer offset".
=
# We've allocated a number of words of heap memory;
# now it is time to wrap up the allocation burst and
# bring heap_allocation_pointer up to date by doing
#
# heap_allocation_pointer += hap_offset;
#
# We keep heap_allocation_pointer aligned on odd 32-bit
# boundary so that after allocating a 32-bit tagword we
# will be correctly aligned for 64-bit data.
#
# (We have accounted for the extra space this eats up
# in pkg pick_nextcode_fns_for_heaplimit_checks.) # pick_nextcode_fns_for_heaplimit_checks is from
src/lib/compiler/back/low/main/nextcode/pick-nextcode-fns-for-heaplimit-checks.pkg #
if (hap_offset != 0)
#
if (unt::bitwise_and (unt::from_int hap_offset, 0u4) != 0u0) advance_by (hap_offset+4); # 64-bit issue: '4' is 'wordbytes' -- and this alignment is not needed on 64-bit implementations anyhow.
else advance_by (hap_offset );
fi;
fi
where
fun advance_by hap_offset
=
{ advanced_heap_ptr := *advanced_heap_ptr + hap_offset;
#
buf.put_op (tcf::LOAD_INT_REGISTER # heap_allocation_pointer_register += hap_offset;
(
ptr_bitsize,
heap_allocation_pointer_register,
tcf::ADD
( pri::address_width,
pri::heap_allocation_pointer,
int hap_offset
) ) );
};
end;
#
fun maybe_test_heap_allocation_limit hap_offset # "hap_offset" == "heap_allocation_pointer offset".
=
{ update_heap_allocation_pointer hap_offset;
# This next is a nop on Intel32; on Pwrpc32 and Sparc32
# it loads a register with bits from a status register
# loaded by a previous (delay-slot) comparison
#
# heaplimit_allocation_pointer > heaplimit_allocation_limit
# # heap_is_exhausted__test def in
src/lib/compiler/back/low/main/sparc32/backend-lowhalf-sparc32.pkg case pri::heap_is_exhausted__test # heap_is_exhausted__test def in
src/lib/compiler/back/low/main/pwrpc32/backend-lowhalf-pwrpc32.pkg #
THE cc => assign_cc (cc, heap_is_exhausted__test); # "cc" == "condition-code" -- the ZERO/OVERFLOW/... status-register bits.
#
NULL => (); # The Intel32 case.
esac
where
fun assign_cc (tcf::CC (_, cc), v)
=>
buf.put_op (tcf::LOAD_INT_REGISTER_FROM_FLAGS_REGISTER (cc, v));
assign_cc _ => error "maybe_test_heap_allocation_limit::assign_cc";
end;
end;
};
# r+n;
#
fun ea (r, 0) => r;
ea (r, n) => tcf::ADD (pri::address_width, r, int n);
end;
# r + 4*n;
#
fun index_ea (r, 0) => r;
index_ea (r, n) => tcf::ADD (pri::address_width, r, int (n*4)); # 64-bit issue -- '4' is presumably 'wordbytes' here.
end;
# Function to heap-allot an integer record
#
# x <- [tagword, field_values... ]
#
# at heap_allocation_pointer + hap_offset:
#
fun allot_record (hc_wrapfn, mem, tagword, field_values, hap_offset)
=
{ buf.put_op (tcf::STORE_INT (int_bitsize, ea (pri::heap_allocation_pointer, hap_offset), tagword, projection (mem, -1))); # Store tagword at start of new record.
#
store_fields (field_values, hap_offset+4, 0); # 64-bit issue: '4' is 'wordbytes'.
#
hap_offset + 4; # 64-bit issue: '4' is 'wordbytes'.
}
where
fun get_field_address (v, record, ncf::SLOT 0 ) => record;
get_field_address (v, record, ncf::SLOT index) => tcf::ADD (pri::address_width, record, int (4*index)); # 64-bit issue: '4' is 'wordbytes'.
get_field_address (v, record, path ) => get_path (get_ramregion v, record, path);
end
also
fun get_path (mem, record, ncf::SLOT index)
=>
index_ea (record, index);
get_path (mem, record, ncf::VIA_SLOT (index, ncf::SLOT 0))
=>
hc_wrapfn (tcf::LOAD (int_bitsize, index_ea (record, index), projection (mem, index)));
get_path (mem, record, ncf::VIA_SLOT (index, path))
=>
{ mem = projection (mem, index);
#
get_path (mem, hc_ptr (tcf::LOAD (int_bitsize, index_ea (record, index), mem)), path);
};
end;
#
fun store_fields ([], hap_offset, element)
=>
hap_offset;
store_fields ((record, path) ! field_values, hap_offset, element)
=>
{ buf.put_op # heap_allocation_pointer[ hap_offset ] = v.p (?);
(tcf::STORE_INT
(
int_bitsize,
tcf::ADD (pri::address_width, pri::heap_allocation_pointer, int hap_offset), # Where to store: heap_allocation_pointer + hap_offset;
get_field_address (record, def_for_int_codetemp' record, path), # What to store. 'path' can be a simple slot number, or a true path.
projection (mem, element)
)
);
#
store_fields (field_values, hap_offset+4, element+1); # 64-bit issue: '4' is 'wordbytes'.
};
end;
end;
# Same as above, except for floating point instead of int record:
#
# x <- [tagword, field_values... ]
#
fun allot_frecord (mem, tagword, field_values, hap_offset)
=
{ buf.put_op (tcf::STORE_INT (int_bitsize, ea (pri::heap_allocation_pointer, hap_offset), tagword, projection (mem, -1)));
#
fstore_fields (field_values, hap_offset+4, 0); # 64-bit issue: '4' is 'wordbytes'.
#
hap_offset+4; # 64-bit issue: '4' is 'wordbytes'.
}
where
fun fea (r, 0) => r;
fea (r, n) => tcf::ADD (pri::address_width, r, int (n*8));
end;
#
fun fget_field (v, ncf::SLOT 0) => def_for_float_codetemp v;
fget_field (v, ncf::SLOT _) => error "allot_frecord::fget_field";
fget_field (v, p ) => fget_path (get_ramregion v, def_for_int_codetemp' v, p);
end
also
fun fget_path (mem, e, ncf::SLOT _)
=>
error "allot_frecord::fget_path";
fget_path (mem, e, ncf::VIA_SLOT (n, ncf::SLOT 0))
=>
hc_flt (tcf::FLOAD (flt_bitsize, fea (e, n), projection (mem, n)));
fget_path (mem, e, ncf::VIA_SLOT (n, p))
=>
{ mem = projection (mem, n);
fget_path (mem, hc_ptr (tcf::LOAD (int_bitsize, index_ea (e, n), mem)), p);
};
end;
#
fun fstore_fields ([], hap_offset, element)
=>
hap_offset;
fstore_fields((v, p) ! field_values, hap_offset, element)
=>
{ buf.put_op
(tcf::STORE_FLOAT
(
flt_bitsize,
tcf::ADD (pri::address_width, pri::heap_allocation_pointer, int hap_offset),
fget_field (v, p),
projection (mem, element)
) );
fstore_fields (field_values, hap_offset+8, element+1);
};
end;
end;
# Allocate a header pair for vector or rw_vector:
#
# heap_allocation_pointer[ hap_offset ] = hdr_tagword;
# heap_allocation_pointer[ hap_offset + 4 ] = data_ptr;
# heap_allocation_pointer[ hap_offset + 8 ] = length_in_slots;
#
fun allocate_vector_header (hdr_tagword, mem, data_ptr, length_in_slots, hap_offset)
=
{ buf.put_op (tcf::STORE_INT (int_bitsize, ea (pri::heap_allocation_pointer, hap_offset), int hdr_tagword, projection (mem,-1)));
buf.put_op (tcf::STORE_INT (int_bitsize, ea (pri::heap_allocation_pointer, hap_offset+4), # 64-bit issue: '4' is 'wordbytes'.
tcf::CODETEMP_INFO (int_bitsize, data_ptr), projection (mem, 0)));
buf.put_op (tcf::STORE_INT
(
int_bitsize,
ea (pri::heap_allocation_pointer, hap_offset+8), # 64-bit issue: '8' is '2*wordbytes'.
int (length_in_slots+length_in_slots+1), # len+len+1: == ((len<<1)
|1) -- Unt1 to Tagged_Unt tagged-int form.
projection (mem, 1)
) );
hap_offset+4; # '+4' not '+12' because our caller wants a pointer to post-tagword part of record, not to next-word-to-allot. # 64-bit issue: '4' is 'wordbytes'.
};
#####################################################################
# Tagged_Int-arithmetic implementation.
#
# We store Tagged_Int values in-pointer, marked
# by having the low bit set to 1.
#
# (We keep all heap values word-aligned, so all valid
# heap pointers will always have the low two bits zero.
# Consequently the heapcleaner can always distinguish
# Tagged_Int values from valid pointers when processing a
# heapchunk, allowing it to ignore Tagged_Int values rather
# than treat them as valid pointers and maybe segfault.)
#
# The low-bit tagging of Tagged_Int values
# means that to naively do (say)
#
# k = i + j;
#
# on Tagged_Int values we must actually do
#
# k = (((i >> 1) + (j >> 1)) << 1) + 1;
#
# In practice, in specific situations, we can
# often find equivalent expressions with fewer
# shifts. For example, the above is arithmetically
# equal to:
#
# k = (i - 1) + j;
#
#####################################################################
#
fun add_tagged_int_tag value = tcf::ADD (int_bitsize, value, one); # Set the lowbit tag on a value which is already left-shifted one bit.
fun or_tagged_int_tag value = tcf::BITWISE_OR (int_bitsize, value, one); # Same as above, except it is a safe no-op if the lowbit is already set.
fun strip_tagged_int_tag value = tcf::SUB (int_bitsize, value, one); # Subtract one to clear the lowbit tag on a 31-bit tagged int.
#
fun tag (FALSE, value) => tag_unsigned value;
tag (TRUE, value) => tag_signed value;
end
also
fun tag_unsigned e # Tag unsigned value 'e' as an tagged_int.
=
{ fun double r
=
tcf::ADD (int_bitsize, r, r);
case e
#
tcf::CODETEMP_INFO _
=>
add_tagged_int_tag (double e); # e = (e << 1) + 1;
_ => { tmp = make_int_codetemp_info chi::ptr_type; # XXX ???
tcf::LET ( tcf::LOAD_INT_REGISTER (int_bitsize, tmp, e), # tmp = (e << 1) + 1;
#
add_tagged_int_tag (double (tcf::CODETEMP_INFO (int_bitsize, tmp)))
);
};
esac;
}
also
fun tag_signed e # Same as above, but with int OVERFLOW CHECKING.
=
{ fun double r
=
if *coc::trap_int_overflow tcf::ADD_OR_TRAP (int_bitsize, r, r);
else tcf::ADD (int_bitsize, r, r);
fi;
case e
#
tcf::CODETEMP_INFO _
=>
add_tagged_int_tag (double e); # e = (e << 1) + 1; WITH OVERFLOW TRAPPING.
_ => { tmp = make_int_codetemp_info chi::ptr_type; # XXX ???
tcf::LET ( tcf::LOAD_INT_REGISTER (int_bitsize, tmp, e), # tmp = (e << 1) + 1; WITH OVERFLOW CHECKING
#
add_tagged_int_tag (double (tcf::CODETEMP_INFO (int_bitsize, tmp)))
);
};
esac;
};
#
fun untag { signed => TRUE, value } => untag_signed value;
untag { signed => FALSE, value } => untag_unsigned value;
end
also
fun untag_unsigned (ncf::INT i) => int i; # ncf::INT val is untagged, doesn't need the rightshift.
untag_unsigned v => tcf::RIGHT_SHIFT_U (int_bitsize, def_for_int_codetemp v, one); # v >> 1; # Without sign extension.
end
also
fun untag_signed (ncf::INT i) => int i; # ncf::INT val is untagged, doesn't need the rightshift.
untag_signed v => tcf::RIGHT_SHIFT (int_bitsize, def_for_int_codetemp v, one); # v >> 1; # With sign extension.
end;
#########################################
# Tagged_Int-arithmetic ops.
#
fun tagged_intadd (add_op, ncf::INT k, w ) => add_op (int_bitsize, int (k+k), def_for_int_codetemp w); # w + (k+k) ncf::INT is untagged, the 'k+k' leftshifts it to match tagged values.
tagged_intadd (add_op, w, v as ncf::INT _) => tagged_intadd (add_op, v, w); # Swap args and handle via previous line.
tagged_intadd (add_op, v, w ) => add_op ( int_bitsize, # v + (w - 1)
def_for_int_codetemp v, # By clearing low bit of v but not w we ensure that v+w will have low-bit tagged_int tag set.
strip_tagged_int_tag (def_for_int_codetemp w)
);
end;
#
fun tagged_intsub (sub_op, ncf::INT k, w) => sub_op (int_bitsize, int (k+k+2), def_for_int_codetemp w); # ncf::INT is untagged, 'k+k' leftshifts it to match tagged values.
# # The '+2' leaves low bit set after subtracting a lowbit-tagged int.
tagged_intsub (sub_op, v, ncf::INT k) => sub_op (int_bitsize, def_for_int_codetemp v, int (k+k)); # ncf::INT is untagged, 'k+k' leftshifts it to match tagged values.
# # Subtracting this from a lowbit-tagged int will leave the lowbit set, hence a valid tagged value.
tagged_intsub (sub_op, v, w ) => add_tagged_int_tag (sub_op ( int_bitsize, # Subtract two lowbit-tagged values, then add 1 to set the low bit again.
def_for_int_codetemp v,
def_for_int_codetemp w
) );
end;
#
fun tagged_intxor (ncf::INT k, w ) => tcf::BITWISE_XOR (int_bitsize, int (k+k), def_for_int_codetemp w); # ncf::INT is untagged, 'k+k' leftshifts it to match tagged values; XOR then leaves lowbit tag set.
tagged_intxor (w, v as ncf::INT _) => tagged_intxor (v, w); # Reduce to above case, then apply above line.
tagged_intxor (v, w ) => add_tagged_int_tag (tcf::BITWISE_XOR ( int_bitsize, # XOR two lowbit-tagged values: the XOR clears the lowbit tag, so we then add one to set it again.
def_for_int_codetemp v,
def_for_int_codetemp w
) );
end;
#
fun tagged_intmul (signed, mul_op, v, w)
=
{ fun f (ncf::INT k, ncf::INT j) => (int (k+k), int j); # ncf::INT is untagged; We need ((k*j)<<1)
|1 == ((2*k)*j)|1 == ((k+k)*j)|1; the |1 is done below.
f (ncf::INT k, w) => (untag { signed, value => w }, int (k+k)); # The 'untag' rightshifts, reducing to above case.
f (v, w as ncf::INT _) => f (w, v); # Swap args, reducing to above case.
f (v, w) => ( strip_tagged_int_tag (def_for_int_codetemp v), # Rightshift one arg and strip lowbit tag from other arg, reducing in essence to first case.
untag { signed, value => w }
);
end;
(f (v, w)) -> (v, w);
add_tagged_int_tag (mul_op (int_bitsize, v, w)); # Do the multiply, then set the low bit to make a valid tagged_int tagged integers.
};
#
fun tagged_intdiv (signed, drm, v, w)
=
{ my (v, w)
=
case (v, w)
#
(ncf::INT k, ncf::INT j) => (int k, int j); # ncf::INT is untagged.
(ncf::INT k, w) => (int k, untag { signed, value => w }); # ncf::INT is untagged.
#
(v, ncf::INT k) => (untag { signed, value => v }, int k); # ncf::INT is untagged.
(v, w) => (untag { signed, value => v }, untag { signed, value => w });
esac;
# The only way a 31-bit div can overflow
# is when the result gets retagged so
# we can use tcf::DIVS instead of tcf::DIVS_OR_TRAP:
#
tag ( signed,
signed ?? tcf::DIVS (drm, int_bitsize, v, w)
:: tcf::DIVU ( int_bitsize, v, w)
);
};
#
fun tagged_intrem (signed, drm, v, w)
=
{ my (v, w)
=
case (v, w)
#
(ncf::INT k, ncf::INT j) => (int k, int j); # ncf::INT is untagged.
(ncf::INT k, w) => (int k, untag { signed, value => w }); # ncf::INT is untagged.
#
(v, ncf::INT k) => (untag { signed, value => v }, int k); # ncf::INT is untagged.
(v, w) => (untag { signed, value => v }, untag { signed, value => w });
esac;
# Will not overflow, so
# we tag like unsigned:
#
tag ( FALSE,
signed ?? tcf::REMS (drm, int_bitsize, v, w)
:: tcf::REMU (int_bitsize, v, w)
);
};
#
fun tagged_intlshift (ncf::INT k, w) # ncf::INT is untagged.
=>
add_tagged_int_tag (tcf::LEFT_SHIFT (int_bitsize, int (k+k), untag_unsigned w));
tagged_intlshift (v, ncf::INT k) # ncf::INT is untagged.
=>
add_tagged_int_tag (tcf::LEFT_SHIFT (int_bitsize, strip_tagged_int_tag (def_for_int_codetemp v), int k));
tagged_intlshift (v, w)
=>
add_tagged_int_tag (tcf::LEFT_SHIFT (int_bitsize, strip_tagged_int_tag (def_for_int_codetemp v), untag_unsigned w));
end;
#
fun tagged_intrshift (rshift_op, v, ncf::INT k) # ncf::INT is untagged.
=>
or_tagged_int_tag (rshift_op (int_bitsize, def_for_int_codetemp v, int k));
tagged_intrshift (rshift_op, v, w)
=>
or_tagged_int_tag (rshift_op (int_bitsize, def_for_int_codetemp v, untag_unsigned w));
end;
###########################################################################
# Heapchunk tags and related support.
# For tagword definitions see src/c/h/heap-tags.h
###########################################################################
#
fun get_heapchunk_tagword v # return v[-1];
=
tcf::LOAD ( int_bitsize,
tcf::SUB (ptr_bitsize, def_for_int_codetemp v, int 4), # 64-bit issue: '4' is 'wordbytes'.
get_ramregion_projection (v, -1)
);
# Compare to GET_LENGTH_IN_WORDS_FROM_TAGWORD from src/c/h/heap-tags.h
# Here we are also fetching the tagword:
#
fun get_heapchunk_length_as_tagged_int v # Length-in-words, I think.
=
or_tagged_int_tag
(tcf::RIGHT_SHIFT_U
(
int_bitsize,
get_heapchunk_tagword v,
int (tag::tag_width - 1) # "-1": This leaves a garbage bit at the bottom; the above or_tagged_int_tag then produces a valid Tagged_Int value.
) );
#
fun set_up_args_for_fn_call (formal_args, actual_args)
=
# Here we generate code to execute immediately before
# jumping to the entrypoint for a function.
#
# We're given the formal argument list -- what the callee expects to get --
# and also the actual argument list -- what the caller actually has.
#
# Our task here is to construct copies from the
# codetemps currently holding them to the registers
# the caller wants them in. (Later on the register
# allocator will attempt to eliminate as many as possible
# of these copies by appropriate assignement of codetemps
# to registers, but that is not our concern here.)
#
# Note that
#
# formal_args intersect actual_args
#
# is always empty because our formal args are
# immediately copied to fresh codetemps:
#
gather
( formal_args,
actual_args,
[], [], # dst regs, src regs: In these two we accumulate args for a tcf::MOVE_INT_REGISTERS parallel register-copy.
[], # In this one we accumulate args for a tcf::MOVE_FLOAT_REGISTERS parallel register-copy.
[], # In this one we construct a "tree-ified" ... (something).
[] # In this one we accumulate loads from ram (as opposed to the preceding reg-to-reg copies).
)
where
#
fun is_inlined (ncf::CODETEMP r) => get_codetemp_use_frequency r == ONE_USE_AND_INLINED;
is_inlined _ => FALSE;
end;
#
fun gather ([], [], cp_rd, cp_rs, float_copies, treeified, loads)
=>
{ apply buf.put_op treeified;
case (cp_rd, cp_rs)
#
([],[]) => ();
_ => buf.put_op (tcf::MOVE_INT_REGISTERS (int_bitsize, cp_rd, cp_rs));
esac;
case float_copies
#
[] => ();
_ => buf.put_op (tcf::MOVE_FLOAT_REGISTERS (flt_bitsize, map #1 float_copies, map #2 float_copies));
esac;
apply buf.put_op loads;
};
gather ( tcf::INT_EXPRESSION (tcf::CODETEMP_INFO (type, rd)) ! formal_args,
actual_arg ! actual_args,
cp_rd, cp_rs,
float_copies,
treeified,
loads
)
=>
case (def_for_int_codetemp actual_arg)
#
tcf::CODETEMP_INFO (_, rs)
=>
gather (formal_args, actual_args, rd ! cp_rd, rs ! cp_rs, float_copies, treeified, loads);
e => if (is_inlined actual_arg)
#
gather (formal_args, actual_args, cp_rd, cp_rs, float_copies, tcf::LOAD_INT_REGISTER (type, rd, e) ! treeified, loads);
else gather (formal_args, actual_args, cp_rd, cp_rs, float_copies, treeified, tcf::LOAD_INT_REGISTER (type, rd, e) ! loads);
fi;
esac;
gather ( tcf::INT_EXPRESSION (tcf::LOAD (type, ea, r)) ! formal_args,
actual_arg ! actual_args,
cp_rd, cp_rs,
float_copies,
treeified,
loads
)
=>
# Always store them early!
#
gather ( formal_args, actual_args,
cp_rd, cp_rs,
float_copies,
tcf::STORE_INT (type, ea, def_for_int_codetemp actual_arg, r) ! treeified,
loads
);
gather ( tcf::FLOAT_EXPRESSION (tcf::CODETEMP_INFO_FLOAT (type, fd)) ! formal_args,
actual_arg ! actual_args,
cp_rd, cp_rs,
float_copies,
treeified,
loads
)
=>
case (def_for_float_codetemp actual_arg)
#
tcf::CODETEMP_INFO_FLOAT (_, fs)
=>
gather (formal_args, actual_args, cp_rd, cp_rs, (fd, fs) ! float_copies, treeified, loads);
e =>
if (is_inlined actual_arg)
gather (formal_args, actual_args, cp_rd, cp_rs, float_copies, tcf::LOAD_FLOAT_REGISTER (type, fd, e) ! treeified, loads);
else
gather (formal_args, actual_args, cp_rd, cp_rs, float_copies, treeified, tcf::LOAD_FLOAT_REGISTER (type, fd, e) ! loads);
fi;
esac;
gather _
=>
error "set_up_args_for_fn_call/gather";
end;
end;
#############################################################################
# Scale-and-add -- return a + i*k for k = 1,4,8.
# These are (were?) important because the Intel32
# effective address logic is hardwired to compute them,
# and independently because we use them a lot to load
# byte, word and float values from heap records.
#
fun add_ix1 (a, ncf::INT 0) => a; # ncf::INT is untagged.
add_ix1 (a, ncf::INT k) => tcf::ADD (int_bitsize, a, int k); # ncf::INT is untagged.
add_ix1 (a, i ) => tcf::ADD (int_bitsize, a, untag_signed i);
end;
#
fun add_ix4 (a, ncf::INT 0) => a; # ncf::INT is untagged.
add_ix4 (a, ncf::INT i) => tcf::ADD (int_bitsize, a, int (i*4)); # ncf::INT is untagged.
add_ix4 (a, i ) => tcf::ADD (int_bitsize, a, tcf::LEFT_SHIFT (int_bitsize, untag_signed i, two));
end;
#
fun add_ix8 (a, ncf::INT 0) => a; # ncf::INT is untagged.
add_ix8 (a, ncf::INT i) => tcf::ADD (int_bitsize, a, int (i*8)); # ncf::INT is untagged.
add_ix8 (a, i ) => tcf::ADD (int_bitsize, a, tcf::LEFT_SHIFT (int_bitsize, strip_tagged_int_tag (def_for_int_codetemp i), int 2));
end;
###################################################################
# Zero-extend and sign-extend:
#
fun zero_extend_32 (size, value)
=
tcf::ZERO_EXTEND (32, size, value);
# tcf::RIGHT_SHIFT_U (32, tcf::LEFT_SHIFT (32, value, int (32 - size)), int (32 - size))
#
fun sign_extend_32 (size, value)
=
tcf::SIGN_EXTEND (32, size, value);
# tcf::RIGHT_SHIFT (32, tcf::LEFT_SHIFT (32, value, int (32 - size)), int (32 - size))
#
fun log_boxed_update_to_heap_changelog (updated_address, hap_offset)
=
# Add to the heap changelog the address
# where a boxed update has occurred.
#
# The heap changelog is basically a
# a list of CONS cells, to which we
# are prepending a new cell.
#
# We generate code equivalent to:
#
# heap_allocation_pointer[0] = updated_address;
# heap_allocation_pointer[4] = heap_changelog_pointer;
#
# heap_changelog_pointer = heap_allocation_pointer;
#
{ buf.put_op (tcf::STORE_INT (ptr_bitsize, tcf::ADD (pri::address_width, pri::heap_allocation_pointer, int hap_offset),
updated_address, rgn::heap_changelog)); #
buf.put_op (tcf::STORE_INT (ptr_bitsize, tcf::ADD (pri::address_width, pri::heap_allocation_pointer, int (hap_offset+4)), # 64-bit issue -- '4' is 'wordbytes'.
pri::heap_changelog_pointer use_virtual_framepointer, rgn::heap_changelog));
buf.put_op (set_rreg (pri::heap_changelog_pointer use_virtual_framepointer, tcf::ADD (pri::address_width, pri::heap_allocation_pointer, int hap_offset)));
};
#########################################################
# Nextcode-to-Treecode COMPARE OP translation.
#
# Nextcode compare operators are un/signed agnostic;
# in Nextcode un/signed type information is carried
# by the operands. In Treecode operands carry no
# unsigned information; instead binary operators are
# explicitly divided into signed and unsigned flavors.
# Here we implement the translation:
#########################################################
#
fun to_tcf_unsigned_compare op
=
case op
ncf::p::GT => tcf::GTU; ncf::p::GE => tcf::GEU;
ncf::p::LT => tcf::LTU; ncf::p::LE => tcf::LEU;
ncf::p::EQL => tcf::EQ; ncf::p::NEQ => tcf::NE;
esac;
#
fun to_tcf_signed_compare op
=
case op
ncf::p::GT => tcf::GT; ncf::p::GE => tcf::GE;
ncf::p::LT => tcf::LT; ncf::p::LE => tcf::LE;
ncf::p::NEQ => tcf::NE; ncf::p::EQL => tcf::EQ;
esac;
#
fun float64cmp (op, v, w)
=
tcf::FCMP (64, fcond, def_for_float_codetemp v, def_for_float_codetemp w)
where
fcond = case op
ncf::p::f::EQ => tcf::FEQ; # XXX BUGGO FIXME We should make these name sets identical (do we need both?)
ncf::p::f::ULG => tcf::FNEU; # (I presume this is just a relic of when SML/NJ and MLRISC were separate codebases.)
ncf::p::f::UN => tcf::FUO;
ncf::p::f::LEG => tcf::FGLE;
ncf::p::f::GT => tcf::FGT;
ncf::p::f::GE => tcf::FGE;
ncf::p::f::UGT => tcf::FGTU;
ncf::p::f::UGE => tcf::FGEU;
ncf::p::f::LT => tcf::FLT;
ncf::p::f::LE => tcf::FLE;
ncf::p::f::ULT => tcf::FLTU;
ncf::p::f::ULE => tcf::FLEU;
ncf::p::f::LG => tcf::FNE;
ncf::p::f::UE => tcf::FEQU;
esac ;
end;
#
fun go_to_label label
=
tcf::GOTO (tcf::LABEL label, []);
# Trapping int overflows is expensive,
# so we do it only if requested:
#
add_or_trap = *coc::trap_int_overflow ?? tcf::ADD_OR_TRAP :: tcf::ADD;
sub_or_trap = *coc::trap_int_overflow ?? tcf::SUB_OR_TRAP :: tcf::SUB;
#
muls_or_trap = *coc::trap_int_overflow ?? tcf::MULS_OR_TRAP :: tcf::MULS;
divs_or_trap = *coc::trap_int_overflow ?? tcf::DIVS_OR_TRAP :: tcf::DIVS;
##############################################################
# Here begins the recursive function set.
##############################################################
#
fun translate_nextcode_function_to_treecode (fun_label, fun_kind, fun_id, arg_codetemps, args, arg_types, fun_body)
=
{ generate_function_prolog (fun_label, fun_kind, fun_id, arg_codetemps, args, arg_types, fun_body);
#
advanced_heap_ptr := 0;
#
translate_nextcode_ops_to_treecode (fun_body, 0);
} # fun translate_nextcode_function_to_treecode
also
fun generate_function_prolog (fun_label, fun_kind, fun_id, arg_codetemps, args, arg_types, fun_body)
=
# Here we generate the function prolog,
# which in particular copies arguments from
# their fixed arg-passing registers into fresh
# local codetemps. Our tasks here include:
#
# 1. Record types for each codetemp definition.
# This is used to determine the arg-passing
# convention for public functions.
#
# 2. Count the number of uses of each codetemp,
# on a scale of "zero, one, many".
# This is used in the forward propagation logic.
#
# 3. Set need_base_pointer TRUE iff the base_pointer is needed.
# It is needed iff
# a. There is a reference to ncf::LABEL
# b. It uses ncf::JUMPTABLE -- the jumptable requires base_pointer.
#
# 4. Generate the heapcleaner tests for PUBLIC and PRIVATE functions.
#
# 5. If we're doing any floating point allocation (i.e.,
# any heap allocation which needs to be 64-bit aligned)
# we must align heap_allocation_pointer.
#
{
####################################################################
# Survey pass.
#
# We start by doing a pass over the code to:
#
# 1. See if any doubleword allocations are done. # E.g. 64-bit float allocations on 32-bit machine.
# If so, we set needs_doubleword_alignment := TRUE to remind
# us to later make sure heap_allocation_pointer is correctly
# aligned for doubleword allocations. # 64-bit issue: Not(?) needed in 64-bit code.
#
# 2. See if base_pointer is needed.
# If so, we set need_base_pointer := TRUE.
#
# 3. Compute for each codetemp whether it is 'use'd zero, one or many times.
####################################################################
needs_doubleword_alignment = REF FALSE; # Begin by assuming no doubleword allocations.
need_base_pointer = REF FALSE; # Begin by assuming no need for base_pointer.
stipulate
fun count_use codetemp # Here is where we count "zero, one, many."
=
case (get_codetemp_use_frequency codetemp)
#
NO_USES => set_codetemp_use_frequency (codetemp, ONE_USE);
ONE_USE => set_codetemp_use_frequency (codetemp, MULTIPLE_USES);
MULTIPLE_USES => ();
#
_ => error "count_use";
esac;
#
fun check_value (ncf::CODETEMP codetemp) => count_use codetemp;
check_value (ncf::LABEL _) => need_base_pointer := TRUE;
check_value _ => ();
end;
#
fun check_values [] => ();
check_values (ncf::CODETEMP v ! rest) => { count_use v; check_values rest; };
check_values (ncf::LABEL _ ! rest) => { need_base_pointer := TRUE; check_values rest; };
check_values (_ ! rest) => { check_values rest; };
end;
#
fun check_record_values []
=>
();
check_record_values ((ncf::CODETEMP v, _) ! rest)
=>
{ count_use v;
#
check_record_values rest;
};
check_record_values ((ncf::LABEL v, _) ! rest)
=>
{ need_base_pointer := TRUE;
#
check_record_values rest;
};
check_record_values (_ ! rest)
=>
check_record_values rest;
end;
herein
#
fun note_doubleword_allocations_and_base_pointer_uses_and_uses_per_codetemp fun_body
=
loop fun_body
where
# This one is very simple: We mostly just loop
# over all ops in the function body updating
#
# needs_doubleword_alignment
# need_base_pointer
# codetemp_use_frequency
#
# in the obvious manner. The one weirdness is
# that we abuse the codetemp use frequency counts
# to keep float-reads from moving past float-writes:
#
fun loop (ncf::DEFINE_RECORD r)
=>
{ case r.kind
#
ncf::rk::FLOAT64_FATE_FN => needs_doubleword_alignment := TRUE;
ncf::rk::FLOAT64_BLOCK => needs_doubleword_alignment := TRUE;
#
_ => ();
esac;
check_record_values r.fields;
set_ncftype_for_codetemp (r.to_temp, ncf::bogus_pointer_type);
loop r.next;
};
loop (ncf::GET_FIELD_I { record, to_temp, type, next, ... })
=>
{ check_value record;
#
set_ncftype_for_codetemp (to_temp, type);
loop next;
};
loop (ncf::GET_ADDRESS_OF_FIELD_I { record, to_temp, next, ... })
=>
{ check_value record;
#
set_ncftype_for_codetemp (to_temp, ncf::bogus_pointer_type);
loop next;
};
loop (ncf::JUMPTABLE { i, nexts, ... })
=>
{ need_base_pointer := TRUE;
#
check_value i;
apply loop nexts;
};
loop (ncf::STORE_TO_RAM { args, next, ... })
=>
{ check_values args;
#
loop next;
};
loop (ncf::FETCH_FROM_RAM { op, args, to_temp, type, next })
=>
{ check_values args;
# A float-read cannot move past a float-write. # Why is this a float problem but not an int problem? -- 2011-08-19 CrT
# For now read operations cannot be treeified.
# This is hacked by making it (falsely) used
# more than once. # XXX SUCKO FIXME There is a suggestion we need barriers in such cases...?
case op
#
( ncf::p::GET_VECSLOT_NUMERIC_CONTENTS { kind_and_size => ncf::p::FLOAT _ }
| ncf::p::GET_FROM_NONHEAP_RAM { kind_and_size => ncf::p::FLOAT _ }
)
=>
set_codetemp_use_frequency (to_temp, MULTIPLE_USES);
_ => ();
esac;
set_ncftype_for_codetemp (to_temp, type);
loop next;
};
loop (ncf::ARITH { args, to_temp, type, next, ... })
=>
{ check_values args;
#
set_ncftype_for_codetemp (to_temp, type);
loop next;
};
loop (ncf::RAW_C_CALL { args, to_ttemps, next, ... })
=>
{ check_values args;
#
apply set_ncftype_for_codetemp to_ttemps;
loop next;
};
loop (ncf::PURE { op, args, to_temp, type, next })
=>
{ case op ncf::p::WRAP_FLOAT64 => needs_doubleword_alignment := TRUE;
_ => ();
esac;
check_values args;
set_ncftype_for_codetemp (to_temp, type);
loop next;
};
loop (ncf::IF_THEN_ELSE r)
=>
{ check_values r.args;
#
loop r.then_next;
loop r.else_next;
};
loop (ncf::TAIL_CALL r)
=>
{ check_value r.fn;
check_values r.args;
};
loop _ => error "translate_nextcode_function_to_treecode";
end; # fun loop
end; # where
end; # fun note_doubleword_allocations_and_base_pointer_uses_and_uses_per_codetemp
if *coc::printit
#
print_nextcode_fun (fun_kind, fun_id, arg_codetemps, arg_types, fun_body); # Print debugging information.
fi;
# Copy args to fresh codetemps:
#
case fun_kind
#
ncf::PRIVATE_FN
=>
{ buf.put_private_label fun_label;
#
note_doubleword_allocations_and_base_pointer_uses_and_uses_per_codetemp
#
fun_body;
copy_args_to_arg_codetemps (args, arg_codetemps, arg_types);
};
ncf::PRIVATE_FN_WHICH_NEEDS_HEAPLIMIT_CHECK
=>
{ buf.put_private_label fun_label;
# heapcleaner test
put_heaplimit_check
=
if (*do_extra_lowhalf_optimizations
and *lowhalf_optimize_before_making_heapcleaner_code) ihc::put_heaplimit_check_and_push_heapcleaner_call_spec_for_optimized_private_fn;
else ihc::put_heaplimit_check_and_push_heapcleaner_call_spec_for_unoptimized_private_fn;
fi;
put_heaplimit_check
#
buf
#
{ max_possible_heapbytes_allocated_before_next_heaplimit_check => 4 * (max_possible_heapwords_allocated_before_next_heaplimit_check fun_id), # 64-bit issue: '4' is 'wordbytes'.
#
live_registers => args,
live_register_types => arg_types,
#
return => go_to_label fun_label
};
note_doubleword_allocations_and_base_pointer_uses_and_uses_per_codetemp
#
fun_body;
copy_args_to_arg_codetemps (args, arg_codetemps, arg_types);
};
_ =>
# Public function:
#
{ regformals = args;
#
my (linkreg, regformals_tail)
=
case args
#
(tcf::INT_EXPRESSION linkreg ! regformals_tail)
=>
(linkreg, regformals_tail);
_ => error "no linkreg for public function";
esac;
entry_label
=
split_entry_block
?? get_codelabel_for_fun_id (-fun_id - 1)
:: fun_label;
if (not split_entry_block)
#
buf.put_public_label fun_label;
else
buf.put_public_label entry_label;
buf.put_bblock_note empty_block;
buf.put_private_label fun_label;
fi;
clear_hashtables ();
note_doubleword_allocations_and_base_pointer_uses_and_uses_per_codetemp
fun_body;
if *need_base_pointer
#
baseval # baseval = linkreg + (const_base_pointer_reg_offset - entry_label)
=
tcf::ADD
( pri::address_width,
linkreg,
tcf::LABEL_EXPRESSION
( tcf::SUB
( pri::address_width,
const_base_pointer_reg_offset,
tcf::LABEL entry_label
)
)
);
buf.put_op (set_rreg (pri::base_pointer use_virtual_framepointer, baseval)); # base_pointer = linkreg + (const_base_pointer_reg_offset - entry_label)
fi;
ihc::put_heaplimit_check_and_push_heapcleaner_call_spec_for_public_fn
#
buf
#
{ max_possible_heapbytes_allocated_before_next_heaplimit_check => 4 * (max_possible_heapwords_allocated_before_next_heaplimit_check fun_id), # 64-bit issue -- '4' is wordbytes
#
live_registers => regformals,
live_register_types => arg_types,
#
return => tcf::GOTO (linkreg,[])
};
copy_args_to_arg_codetemps
(
regformals_tail,
tail arg_codetemps,
tail arg_types
);
};
esac;
# If needed, align heap_allocation_pointer
# correctly for doubleword allocation. # On 32-bit machines we need doubleword alignment for 64-bit values, mainly 64-bit floats.
#
# ("Correctly" means such that we'll be 64-bit
# aligned after allocating a 32-bit tagword.)
#
if *needs_doubleword_alignment
#
buf.put_op
(tcf::LOAD_INT_REGISTER # heap_allocation_pointer
|= 4;
(
ptr_bitsize,
heap_allocation_pointer_register,
tcf::BITWISE_OR
(
ptr_bitsize,
pri::heap_allocation_pointer,
int 4 # 64-bit issue -- 4 is "bytes_per_word".
) ) ); # 64-bit issue -- this alignment is not needed (or correct) for 64-bit code.
fi;
} # fun translate_nextcode_function_to_treecode
also
fun define_and_load' (to_temp, to_temp_info, value, next, hap_offset) # Define to_temp as to_tempinfo, then generate code for to_tempinfo := value; next
=
{ set_int_def_for_codetemp' (to_temp, to_temp_info);
#
buf.put_op (tcf::LOAD_INT_REGISTER (int_bitsize, to_temp_info, value));
#
translate_nextcode_ops_to_treecode (next, hap_offset);
}
# "hc_info" == heapcleaner_info -- see Heapcleaner_Info in
src/lib/compiler/back/low/main/nextcode/per-codetemp-heapcleaner-info.api also fun define_and_load (to_temp, hc_info, value, next, hap_offset) = define_and_load' (to_temp, make_int_codetemp_info hc_info, value, next, hap_offset)
also fun define_and_load_with_ncftype (to_temp, ncftype, value, next, hap_offset) = define_and_load' (to_temp, make_int_codetemp_info_with_ncftype ncftype, value, next, hap_offset)
also fun define_and_load_with_kind_and_size (to_temp, kind_and_size, value, next, hap_offset) = define_and_load' (to_temp, make_int_codetemp_info_with_kind_and_size kind_and_size, value, next, hap_offset)
also fun define_and_load_tagged_int (to_temp, value, next, hap_offset) = define_and_load (to_temp, chi::i31_type, value, next, hap_offset)
also fun define_and_load_int1 (to_temp, value, next, hap_offset) = define_and_load (to_temp, chi::i32_type, value, next, hap_offset)
also fun define_and_load_boxed (to_temp, value, next, hap_offset) = define_and_load (to_temp, chi::ptr_type, value, next, hap_offset)
also
fun def_and_load_or_inline (to_temp, value, ncftype, next, hap_offset)
=
case (get_codetemp_use_frequency to_temp)
#
MULTIPLE_USES => define_and_load_with_ncftype (to_temp, ncftype, value, next, hap_offset); # Define to_temp as new to_temp_info and generate code for (to_temp_info := value; next
ONE_USE => { set_codetemp_use_frequency_to__one_use_and_inlined to_temp; # This flag is checked two places by is_inlined() in gather().
# # Note that we generate no tcf::LOAD_INT_REGISTER op in this case -- or any code at all;
# # We are deferring that to point-of-use to reduce register pressure.
#
set_int_def_for_codetemp (to_temp, maybe_note_type_for_heapcleaner (value, ncftype)); # Define to_temp as simply 'value'. Here and treeify_allot() are the only places where
# # a codetemp can acquire a non-CODETEMP_INFO definition.
translate_nextcode_ops_to_treecode (next, hap_offset);
};
NO_USES => translate_nextcode_ops_to_treecode (next, hap_offset);
_ => error "def_and_load_or_inline";
esac
# Generate code for
#
# to_temp := heap_allocation_pointer + offset; next
#
# where offset is the address offset of a newly allocated record.
# If codetemp is only used once, we try to propagate that to its use.
#
also
fun define_and_allot (to_temp, offset, next, hap_offset)
=
define_and_load_boxed
(
to_temp,
tcf::ADD (pri::address_width, pri::heap_allocation_pointer, int offset),
next,
hap_offset
)
# Generate code for
# to_temp := heap_allocation_pointer + offset;
# next
# Forward propagate until it is used.
#
also
fun treeify_allot (to_temp, offset, next, hap_offset)
=
case (get_codetemp_use_frequency to_temp)
#
MULTIPLE_USES => define_and_allot (to_temp, offset, next, hap_offset);
ONE_USE
=>
# We don't mark this as treeified
# because it has low register pressure:
#
{ absolute_alloc_offset = offset + *advanced_heap_ptr;
#
set_int_def_for_codetemp (to_temp, tcf::LATE_CONSTANT absolute_alloc_offset); # Define codetemp as LATE_CONSTANT. Here and def_and_load_or_inline are the only places where
# a codetemp can acquire a non-CODETEMP_INFO definition.
translate_nextcode_ops_to_treecode (next, hap_offset);
};
NO_USES => translate_nextcode_ops_to_treecode (next, hap_offset);
_ => error "treeify_allot";
esac
also
fun define_and_load_float64 (to_temp, value, next, hap_offset)
=
{ f = make_float_codetemp_info chi::f64_type;
#
set_float_def_for_codetemp (to_temp, tcf::CODETEMP_INFO_FLOAT (flt_bitsize, f));
buf.put_op (tcf::LOAD_FLOAT_REGISTER (flt_bitsize, f, value));
translate_nextcode_ops_to_treecode (next, hap_offset);
}
also
fun def_and_load_or_inline_float64 (to_temp, value, next, hap_offset) # to_temp <- e where e contains a floating-point value
=
case (get_codetemp_use_frequency to_temp)
#
NO_USES => translate_nextcode_ops_to_treecode (next, hap_offset);
ONE_USE => { set_codetemp_use_frequency_to__one_use_and_inlined to_temp;
#
set_float_def_for_codetemp (to_temp, value);
translate_nextcode_ops_to_treecode (next, hap_offset);
};
MULTIPLE_USES => define_and_load_float64 (to_temp, value, next, hap_offset);
_ => error "def_and_load_or_inline_float64";
esac
also
fun nop (to_temp, arg, value, hap_offset)
=
define_and_load_tagged_int (to_temp, def_for_int_codetemp arg, value, hap_offset)
also
fun copy (hc_info, to_temp, arg, next, hap_offset)
=
{ dst = make_int_codetemp_info hc_info;
#
set_int_def_for_codetemp' (to_temp, dst);
case (def_for_int_codetemp arg)
#
tcf::CODETEMP_INFO (_, src)
=>
buf.put_op (tcf::MOVE_INT_REGISTERS (int_bitsize, [dst], [src]));
e => buf.put_op (tcf::LOAD_INT_REGISTER (int_bitsize, dst, e));
esac;
translate_nextcode_ops_to_treecode (next, hap_offset);
}
also
fun copy_m (31, to_temp, arg, next, hap_offset) => copy (chi::i31_type, to_temp, arg, next, hap_offset);
copy_m ( _, to_temp, arg, next, hap_offset) => copy (chi::i32_type, to_temp, arg, next, hap_offset);
end
# also # Commented out because it is never used. -- 2011-08-20 CrT
# fun same_val (ncf::CODETEMP x, ncf::CODETEMP y) => x == y;
# same_val (ncf::LABEL x, ncf::LABEL y) => x == y;
# same_val (ncf::INT x, ncf::INT y) => x == y; # ncf::INT is untagged.
# #
# same_val _ => FALSE;
# end
also
fun branch (fun_id, compare, [ arg1, arg2 ], yes, no, hap_offset) # normal branches
=>
{ true_label = lbl::make_anonymous_codelabel ();
#
# "Is single assignment great or what!"
#
buf.put_op
(branch_with_probability
(
tcf::IF_GOTO (tcf::CMP (32, compare, def_for_int_codetemp arg1, def_for_int_codetemp arg2), true_label),
#
fun_id__to__branch_probability fun_id
)
);
do_next (no, hap_offset);
put_private_label (true_label, yes, hap_offset);
};
branch _ => error "branch";
end
also
fun branch_if_boxed (fun_id, arg, yes, no, hap_offset) # Branch if x is boxed (x & 1 == 0)
=
{ label = lbl::make_anonymous_codelabel ();
#
compare = tcf::CMP (32, tcf::NE, tcf::BITWISE_AND (int_bitsize, def_for_int_codetemp arg, one), zero);
buf.put_op (branch_with_probability (tcf::IF_GOTO (compare, label), fun_id__to__branch_probability fun_id));
do_next (yes, hap_offset);
put_private_label (label, no, hap_offset);
}
also
fun branch_streq (len, string1, string2, yes, no, hap_offset)
=
# Branch if string1,string2 are identical strings of length 'len'.
# Note that 'len' is fixed at compile-time.
# For speed, we compare a word at a time instead of a byte at a time.
#
# We implement the string comparison as
#
# if (string1[0] == string2[0]) goto false_label; # NB: string1[] and string2[] are arrays of words, not bytes.
# if (string1[1] == string2[1]) goto false_label;
# if (string1[2] == string2[2]) goto false_label;
# ...
# yes-stuff;
# false_label:
#
{ len' = ((len+3) / 4) * 4; # Round up to integral number of words. # 64-bit issue: both '4's are wordbytes, '3' is wordbytes-1
#
false_label = lbl::make_anonymous_codelabel ();
r1 = make_int_codetemp_info chi::i32_type;
r2 = make_int_codetemp_info chi::i32_type;
buf.put_op (tcf::LOAD_INT_REGISTER (int_bitsize, r1, tcf::LOAD (int_bitsize, def_for_int_codetemp string1, rgn::readonly)));
buf.put_op (tcf::LOAD_INT_REGISTER (int_bitsize, r2, tcf::LOAD (int_bitsize, def_for_int_codetemp string2, rgn::readonly)));
#
unroll 0
where
#
fun compare_word i
=
tcf::CMP
( 32, # 64-bit issue: '32' is wordbits.
tcf::NE,
tcf::LOAD (int_bitsize, tcf::ADD (int_bitsize, tcf::CODETEMP_INFO (int_bitsize, r1), i), rgn::readonly),
tcf::LOAD (int_bitsize, tcf::ADD (int_bitsize, tcf::CODETEMP_INFO (int_bitsize, r2), i), rgn::readonly)
);
#
fun unroll i
=
if (i != len')
#
buf.put_op (tcf::IF_GOTO (compare_word (int i), false_label));
#
unroll (i+4); # 64-bit issue: '4' is wordbytes.
fi;
end;
do_next (yes, hap_offset);
put_private_label (false_label, no, hap_offset);
}
also
fun conditional_move (op, args, codetemp, ncftype, next, hap_offset) # Conditional move.
=
# A conditional move lets us compute an expression like
#
# foo = mumble ?? bar :: zot;
#
# without any jumps or branches, via a sequence like
#
# load reg0, zot
# compare mumble
# conditional_move reg0, bat
#
# Using a branch-free alternative like conditional_move
# is a big win on modern deeply-pipelined CPU architectures
# because a mispredicted branch can cost us many cycles
# while the pipeline dumps and reloads.
#
{ fun signed (op, arg1, arg2) = tcf::CMP (32, to_tcf_signed_compare op, def_for_int_codetemp arg1, def_for_int_codetemp arg2); # 64-bit issue: The '32's all through here must be something like wordbits...?
fun unsigned (op, arg1, arg2) = tcf::CMP (32, to_tcf_unsigned_compare op, def_for_int_codetemp arg1, def_for_int_codetemp arg2);
#
fun equal (arg1, arg2) = tcf::CMP (32, tcf::EQ, def_for_int_codetemp arg1, def_for_int_codetemp arg2);
fun notequal (arg1, arg2) = tcf::CMP (32, tcf::NE, def_for_int_codetemp arg1, def_for_int_codetemp arg2);
#
fun boxed arg = tcf::CMP (32, tcf::EQ, tcf::BITWISE_AND (int_bitsize, def_for_int_codetemp arg, one), zero);
fun unboxed arg = tcf::CMP (32, tcf::NE, tcf::BITWISE_AND (int_bitsize, def_for_int_codetemp arg, one), zero);
my (compare, a, b)
=
case (op, args)
#
(ncf::p::COMPARE { op, kind_and_size=>ncf::p::INT 31 },[v, w, a, b]) => ( signed (op, v, w), a, b);
(ncf::p::COMPARE { op, kind_and_size=>ncf::p::UNT 31 },[v, w, a, b]) => (unsigned (op, v, w), a, b);
(ncf::p::COMPARE { op, kind_and_size=>ncf::p::INT 32 },[v, w, a, b]) => ( signed (op, v, w), a, b);
(ncf::p::COMPARE { op, kind_and_size=>ncf::p::UNT 32 },[v, w, a, b]) => (unsigned (op, v, w), a, b);
(ncf::p::POINTER_EQL, [v, w, a, b]) => ( equal (v, w), a, b);
(ncf::p::POINTER_NEQ, [v, w, a, b]) => (notequal (v, w), a, b);
(ncf::p::IS_BOXED, [v, a, b]) => ( boxed v, a, b);
(ncf::p::IS_UNBOXED, [v, a, b]) => (unboxed v, a, b);
(ncf::p::COMPARE_FLOATS { op, size=>64 }, [v, w, a, b]) => (float64cmp (op, v, w), a, b);
_ => error "conditional_move";
esac;
case ncftype
#
ncf::typ::FLOAT64 => define_and_load_float64 (codetemp, tcf::FCONDITIONAL_LOAD (64, compare, def_for_float_codetemp a, def_for_float_codetemp b), next, hap_offset);
_ => define_and_load_with_ncftype (codetemp, ncftype, tcf::CONDITIONAL_LOAD (32, compare, def_for_int_codetemp a, def_for_int_codetemp b), next, hap_offset);
esac;
}
also
fun arith (hc_info, op, arg1, arg2, to_temp, next, hap_offset)
=
define_and_load (to_temp, hc_info, op (int_bitsize, def_for_int_codetemp arg1, def_for_int_codetemp arg2), next, hap_offset)
also
fun arith32 (op, arg1, arg2, codetemp, next, hap_offset)
=
arith (chi::i32_type, op, arg1, arg2, codetemp, next, hap_offset)
also
fun logical (hc_info, op, arg1, arg2, to_temp, next, hap_offset)
=
define_and_load (to_temp, hc_info, op (int_bitsize, def_for_int_codetemp arg1, untag_unsigned arg2), next, hap_offset)
also
fun logical31 (op, arg1, arg2, codetemp, next, hap_offset)
=
logical (chi::i31_type, op, arg1, arg2, codetemp, next, hap_offset)
also
fun logical32 (op, arg1, arg2, codetemp, next, hap_offset)
=
logical (chi::i32_type, op, arg1, arg2, codetemp, next, hap_offset)
also
fun do_next (next, hap_offset)
=
{ save = *advanced_heap_ptr;
#
translate_nextcode_ops_to_treecode (next, hap_offset);
advanced_heap_ptr := save;
}
also
fun put_private_label (label, next, hap_offset)
=
{ buf.put_private_label label;
#
translate_nextcode_ops_to_treecode (next, hap_offset);
}
also
fun put_private_label_and_do_next (label, next, hap_offset)
=
{ buf.put_private_label label;
#
do_next (next, hap_offset);
}
also
fun make_record (field_values, to_temp, next, hap_offset) # Allocate a normal record
=
{ len = length field_values;
#
tagword = tagword_to_int (tag::make_tagword (len, tag::pairs_and_records_btag));
treeify_allot
(
to_temp,
allot_record (hc_ptr, mem_disambig to_temp, int tagword, field_values, hap_offset),
next,
hap_offset + 4 + len*4 # 64-bit issue: '4' is 'wordbytes'.
);
}
also
fun make_i32block (field_values, w, next, hap_offset) # Allocate a record with I32 components
=
{ len = length field_values;
#
tagword = tagword_to_int (tag::make_tagword (len, tag::four_byte_aligned_nonpointer_data_btag));
treeify_allot (
w,
allot_record (hc_i32, mem_disambig w, int tagword, field_values, hap_offset),
next,
hap_offset + 4 + len*4 # 64-bit issue: '4' is 'wordbytes'.
);
}
also
fun make_fblock (field_values, w, next, hap_offset) # Allocate a floating point record
=
{ len = list::length field_values;
#
tagword = tagword_to_int (tag::make_tagword (len+len, tag::eight_byte_aligned_nonpointer_data_btag));
# At initialization the allocation pointer is aligned on
# an odd-word boundary (so that allocating a 4-byte tagword
# will leave us correctly aligned for 8-byte data), and the
# heap offset set to zero.
#
# If an odd number of words have been allocated then the
# heap pointer is misaligned for this record creation.
#
hap_offset
=
unt::bitwise_and (unt::from_int hap_offset, 0u4) != 0u0 # 64-bit issue: this isn't needed or correct in 64-bit code.
?? hap_offset + 4 # 64-bit issue: '4' is presumably 'wordbytes'.
:: hap_offset;
# The components are floating point
#
treeify_allot (
w,
allot_frecord (mem_disambig w, int tagword, field_values, hap_offset),
next,
hap_offset + 4 + len*8 # 64-bit issue: '4' is 'wordbytes'.
);
}
also
fun make_vector (slot_values, w, next, hap_offset) # Allocate a vector
=
{ length_in_slots = length slot_values;
#
hdr_tagword = tagword_to_int tag::typeagnostic_ro_vector_tagword;
data_tagword = tagword_to_int (tag::make_tagword (length_in_slots, tag::ro_vector_data_btag));
data_ptr = make_int_codetemp_info chi::ptr_type;
mem = mem_disambig w;
hap_offset' = hap_offset + 4 + length_in_slots*4; # 64-bit issue: '4' is 'wordbytes'.
# The components are boxed.
# Allocate the data:
allot_record (hc_ptr, mem, int data_tagword, slot_values, hap_offset);
buf.put_op (tcf::LOAD_INT_REGISTER (ptr_bitsize, data_ptr, ea (pri::heap_allocation_pointer, hap_offset+4))); # 64-bit issue: '4' is 'wordbytes'.
# Now allot the header pair:
#
treeify_allot (
w,
allocate_vector_header (hdr_tagword, mem, data_ptr, length_in_slots, hap_offset + 4 + length_in_slots*4), # 64-bit issue: '4' is 'wordbytes'.
next,
hap_offset'+12 # 64-bit issue: '12' is '3*wordbytes'.
);
}
also
fun fselect (index, vector, to_temp, next, hap_offset) # Fetch contents of a slot in a float vector/block.
=
# Floating point select: Fetch vector[ index ];
#
def_and_load_or_inline_float64 (
to_temp,
tcf::FLOAD (flt_bitsize, add_ix8 (def_for_int_codetemp vector, ncf::INT index), rgn::float), # ncf::INT is untagged.
next,
hap_offset
)
also
fun select (index, vector, to_temp, ncftype, next, hap_offset) # Fetch contents of a slot in an int vector/block.
=
# Non-floating point select: Fetch vector[ index ];
#
def_and_load_or_inline (
to_temp,
tcf::LOAD (int_bitsize, add_ix4 (def_for_int_codetemp vector, ncf::INT index), get_ramregion_projection (vector, index)), # ncf::INT is untagged. # 64-bit issue: Need add_ix4 -> add_ix8
ncftype,
next,
hap_offset
)
also
fun funny_select (index, k, to_temp, ncftype, next, hap_offset)
=
# "Funny select; I don't know what this does."
#
# o 'index' is never used.
#
# o This fn is called only when for selects with
# ncf::GET_FIELD_I.record == ncf::INT k -- that is,
# when the field's "record" is in fact an int.
# Raising the question of when/if we would do that.
#
# o This fn is never called during compilation of the complete codebase. -- 2011-08-20 CrT
#
{ unboxed_floats = mp::unboxed_floats; # This appears to be always TRUE currently. -- 2011-08-20 CrT
#
#
fun is_float ncftype
=
if (not unboxed_floats)
#
FALSE;
else
case ncftype ncf::typ::FLOAT64 => TRUE;
_ => FALSE;
esac;
fi;
#
fun falloc_sp (to_temp, next, hap_offset)
=
{ set_float_def_for_codetemp (to_temp, tcf::CODETEMP_INFO_FLOAT (flt_bitsize, make_float_codetemp_info chi::f64_type));
#
translate_nextcode_ops_to_treecode (next, hap_offset);
};
# WARNING: the following generated code should never be executed!
# It is semantic nonsense! XXX BUGGO FIXME
#
if (is_float ncftype) falloc_sp (to_temp, next, hap_offset);
else define_and_load_int1 (to_temp, int k, next, hap_offset); # BOGUS
fi;
}
also
fun call_public_fn (fun_id, actual_args, hap_offset)
=
{ ncftypes_for_args = map ncftype_of actual_args; # "actual_args" is where the argument values currently are.
#
formal_args # "formal_args" is where callee expects to find the argument values.
=
cfa::convert_nextcode_public_fun_args_to_treecode
{
use_virtual_framepointer,
ncftype_for_fun => get_ncftype_for_codetemp fun_id,
ncftypes_for_args
};
dest = case formal_args # 'link' ...?
#
(tcf::INT_EXPRESSION dest ! _)
=>
dest;
_ => error "call_public_fn: dest";
esac;
set_up_args_for_fn_call (formal_args, actual_args); # Copy argument values from where they are to where caller expects to find them.
if track_types_for_heapcleaner
#
buf.put_bblock_note
(
make_heapcleaner_liveinliveout_note
(
hr::heapcleaner_liveout.x_to_note,
formal_args,
ncftypes_for_args
)
);
fi;
maybe_test_heap_allocation_limit hap_offset; # This is a no-op on Intel; on risc it introduces some pipelining by doing the compare early.
buf.put_op (tcf::GOTO (dest, [])); # [] is the might-branch-to labels list.
buf.put_fn_liveout_info (formal_args @ global_registers); # Remember which registers are 'live' at the GOTO (== end of bblock).
}
also
fun call_private_fn (fun_id, actual_args, hap_offset)
=
case (get__callers_info__for__fun_id fun_id)
#
nfs::PRIVATE_FN (REF (nfs::FN_PARAMETERS_IN_TREECODE_FORM formal_args))
=>
{ update_heap_allocation_pointer hap_offset;
#
set_up_args_for_fn_call (formal_args, actual_args);
buf.put_op (go_to_label (get_codelabel_for_fun_id fun_id));
};
nfs::PRIVATE_FN (r as REF (nfs::FN_IN_NEXTCODE_FORM (fun_id, fun_formal_args, ncftypes_for_args, fun_body)))
=>
{ formal_args_in_treecode_form
=
translate_function_formal_args_from_nextcode_to_treecode_form
#
ncftypes_for_args;
fun_label = get_codelabel_for_fun_id fun_id;
r := nfs::FN_PARAMETERS_IN_TREECODE_FORM formal_args_in_treecode_form;
update_heap_allocation_pointer hap_offset;
set_up_args_for_fn_call (formal_args_in_treecode_form, actual_args);
translate_nextcode_function_to_treecode
(
fun_label,
ncf::PRIVATE_FN,
fun_id,
fun_formal_args,
formal_args_in_treecode_form,
ncftypes_for_args,
fun_body
);
};
nfs::PRIVATE_FN_WHICH_NEEDS_HEAPLIMIT_CHECK (fn_info as REF (nfs::FN_IN_NEXTCODE_FORM (fun_id, fun_formal_args, ncftypes_for_args, fun_body)))
=>
{ formal_args_in_treecode_form
=
mp::fixed_arg_passing # fixed_arg_passing is apparently currently always FALSE. -- 2011-08-20 CrT
##
?? cfa::convert_fixed_nextcode_fun_args_to_treecode { ncftypes_for_args, use_virtual_framepointer }
:: translate_function_formal_args_from_nextcode_to_treecode_form ncftypes_for_args;
fun_label = get_codelabel_for_fun_id fun_id;
fn_info := nfs::FN_PARAMETERS_IN_TREECODE_FORM formal_args_in_treecode_form;
set_up_args_for_fn_call (formal_args_in_treecode_form, actual_args);
maybe_test_heap_allocation_limit hap_offset;
translate_nextcode_function_to_treecode
(
fun_label,
ncf::PRIVATE_FN_WHICH_NEEDS_HEAPLIMIT_CHECK,
fun_id,
fun_formal_args,
formal_args_in_treecode_form,
ncftypes_for_args,
fun_body
);
};
nfs::PRIVATE_FN_WHICH_NEEDS_HEAPLIMIT_CHECK (REF (nfs::FN_PARAMETERS_IN_TREECODE_FORM formal_args_in_treecode_form))
=>
{ set_up_args_for_fn_call (formal_args_in_treecode_form, actual_args);
#
maybe_test_heap_allocation_limit hap_offset;
#
buf.put_op (go_to_label (get_codelabel_for_fun_id fun_id));
};
nfs::PUBLIC_FN { parameter_types, ... }
=>
{ formal_args_in_treecode_form
=
cfa::convert_nextcode_public_fun_args_to_treecode
{
ncftype_for_fun => get_ncftype_for_codetemp fun_id,
ncftypes_for_args => parameter_types,
use_virtual_framepointer
};
set_up_args_for_fn_call (formal_args_in_treecode_form, actual_args);
maybe_test_heap_allocation_limit hap_offset;
buf.put_op (go_to_label (get_codelabel_for_fun_id fun_id));
};
esac
also
fun rawload ((ncf::p::INT 32
| ncf::p::UNT 32), i, codetemp, next, hap_offset)
# 64-bit issue: '32' is 'wordbits'.
=>
define_and_load_int1 (codetemp, tcf::LOAD (32, i, rgn::memory), next, hap_offset); # 64-bit issue: '32' is 'wordbits'.
rawload (ncf::p::INT (size as (8
| 16)), i, codetemp, next, hap_offset)
=>
define_and_load_int1 (codetemp, sign_extend_32 (size, tcf::LOAD (size, i, rgn::memory)), next, hap_offset);
rawload (ncf::p::UNT (size as (8
| 16)), i, codetemp, next, hap_offset)
=>
define_and_load_int1 (codetemp, zero_extend_32 (size, tcf::LOAD (size, i, rgn::memory)), next, hap_offset);
rawload ((ncf::p::UNT size
| ncf::p::INT size), _, _, _, _)
=>
error ("rawload: unsupported size: " + int::to_string size);
rawload (ncf::p::FLOAT 64, i, codetemp, next, hap_offset)
=>
def_and_load_or_inline_float64 (codetemp, tcf::FLOAD (64, i, rgn::memory), next, hap_offset);
rawload (ncf::p::FLOAT 32, i, codetemp, next, hap_offset)
=>
def_and_load_or_inline_float64 (codetemp, tcf::FLOAT_TO_FLOAT (64, 32, tcf::FLOAD (32, i, rgn::memory)), next, hap_offset);
rawload (ncf::p::FLOAT size, _, _, _, _)
=>
error ("rawload: unsupported float size: " + int::to_string size);
end
also
fun rawstore ( ( ncf::p::UNT (size as (8
| 16 | 32))
| ncf::p::INT (size as (8 | 16 | 32))
),
i,
codetemp
)
=>
# Both address and value are 32-bit values;
# only 'size' bits of the value are being stored:
#
buf.put_op (tcf::STORE_INT (size, i, def_for_int_codetemp codetemp, rgn::memory));
rawstore ((ncf::p::UNT size
| ncf::p::INT size), _, _)
=>
error ("rawstore: unsupported int size: " + int::to_string size);
rawstore (ncf::p::FLOAT (size as (32
| 64)), i, codetemp)
=>
buf.put_op (tcf::STORE_FLOAT (size, i, def_for_float_codetemp codetemp, rgn::memory));
rawstore (ncf::p::FLOAT size, _, _)
=>
error ("rawstore: unsupported float size: " + int::to_string size);
end
# Generate code
# ncf::DEFINE_RECORD
also
fun translate_nextcode_ops_to_treecode (ncf::DEFINE_RECORD { kind => ncf::rk::FLOAT64_FATE_FN, fields, to_temp, next }, hap_offset) => make_fblock (fields, to_temp, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::DEFINE_RECORD { kind => ncf::rk::FLOAT64_BLOCK, fields, to_temp, next }, hap_offset) => make_fblock (fields, to_temp, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::DEFINE_RECORD { kind => ncf::rk::VECTOR, fields, to_temp, next }, hap_offset) => make_vector (fields, to_temp, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::DEFINE_RECORD { kind => ncf::rk::INT1_BLOCK, fields, to_temp, next }, hap_offset) => make_i32block (fields, to_temp, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::DEFINE_RECORD { kind => _, fields, to_temp, next }, hap_offset) => make_record (fields, to_temp, next, hap_offset);
###############################
# ncf::GET_FIELD_I: # NB: ncf::INT is untagged.
translate_nextcode_ops_to_treecode (ncf::GET_FIELD_I { i, record => ncf::INT k, to_temp, type, next }, hap_offset) => funny_select (i, k, to_temp, type, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::GET_FIELD_I { i, record, to_temp, type => ncf::typ::FLOAT64, next }, hap_offset) => fselect (i, record, to_temp, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::GET_FIELD_I { i, record, to_temp, type, next }, hap_offset) => select (i, record, to_temp, type, next, hap_offset);
###############################
# ncf::GET_ADDRESS_OF_FIELD_I:
translate_nextcode_ops_to_treecode (ncf::GET_ADDRESS_OF_FIELD_I { i, record, to_temp, next }, hap_offset)
=>
define_and_load_boxed (to_temp, add_ix4 (def_for_int_codetemp record, ncf::INT i), next, hap_offset); # ncf::INT is untagged. # 64-bit issue: Need add_ix4 -> add_ix8 on 64-bit architectures.
###############################
# ncf::TAIL_CALL:
translate_nextcode_ops_to_treecode (ncf::TAIL_CALL { fn => ncf::INT k, args }, hap_offset) => update_heap_allocation_pointer hap_offset; # ncf::INT is untagged.
translate_nextcode_ops_to_treecode (ncf::TAIL_CALL { fn => ncf::CODETEMP f, args }, hap_offset) => call_public_fn (f, args, hap_offset);
translate_nextcode_ops_to_treecode (ncf::TAIL_CALL { fn => ncf::LABEL f, args }, hap_offset) => call_private_fn (f, args, hap_offset);
###############################
# ncf::JUMPTABLE:
translate_nextcode_ops_to_treecode (ncf::JUMPTABLE { i => ncf::INT _, ... }, hap_offset)
=>
error "JUMPTABLE"; # Jumptables keying on a constant should have been optimized out in
src/lib/compiler/back/top/improve-nextcode/clean-nextcode-g.pkg translate_nextcode_ops_to_treecode (ncf::JUMPTABLE { i, nexts, ... }, hap_offset)
=>
{ label = lbl::make_anonymous_codelabel ();
labels = map (\\ _ = lbl::make_anonymous_codelabel()) nexts;
tmp_r = make_int_codetemp_info chi::i32_type;
tmp = tcf::CODETEMP_INFO (int_bitsize, tmp_r);
buf.put_op (tcf::LOAD_INT_REGISTER (int_bitsize, tmp_r, make_code_for_label_address (label, 0)));
buf.put_op
(tcf::GOTO
(tcf::ADD
( pri::address_width,
tmp,
tcf::LOAD (ptr_bitsize, add_ix4 (tmp, i), rgn::readonly) # 64-bit issue: Need add_ix4 -> add_ix8 on 64-bit architectures.
),
labels
) );
buf.put_pseudo_op pb::DATA_READ_ONLY;
buf.put_pseudo_op (pb::EXT (cpo::JUMPTABLE { base=>label, targets=>labels } ));
buf.put_pseudo_op pb::TEXT;
pl::apply
(\\ (label, next) = put_private_label_and_do_next (label, next, hap_offset))
(labels, nexts);
};
###############################
# ncf::PURE:
translate_nextcode_ops_to_treecode
(
ncf::PURE { op => ncf::p::CONVERT_FLOAT { from=>ncf::p::INT 31, to=>ncf::p::FLOAT 64 }, # 64-bit issue...
args => [ arg ],
to_temp,
next,
...
},
hap_offset
)
=> def_and_load_or_inline_float64 (to_temp, tcf::INT_TO_FLOAT (flt_bitsize, int_bitsize, untag_signed arg), next, hap_offset);
translate_nextcode_ops_to_treecode
( ncf::PURE { op => ncf::p::CONVERT_FLOAT { from=>ncf::p::INT 32, to=>ncf::p::FLOAT 64 }, # 64-bit issue...
args => [ arg ],
to_temp,
next,
...
},
hap_offset
)
=>
def_and_load_or_inline_float64 (to_temp, tcf::INT_TO_FLOAT (flt_bitsize, int_bitsize, def_for_int_codetemp arg), next, hap_offset);
translate_nextcode_ops_to_treecode
( ncf::PURE { op => ncf::p::PURE_ARITH { op, kind_and_size => ncf::p::FLOAT 64 },
args => [ arg ],
to_temp,
next,
...
},
hap_offset
)
=>
{ r = def_for_float_codetemp arg;
#
case op
#
ncf::p::NEGATE => def_and_load_or_inline_float64 (to_temp, tcf::FNEG (flt_bitsize, r), next, hap_offset);
ncf::p::ABS => def_and_load_or_inline_float64 (to_temp, tcf::FABS (flt_bitsize, r), next, hap_offset);
ncf::p::FSQRT => def_and_load_or_inline_float64 (to_temp, tcf::FSQRT (flt_bitsize, r), next, hap_offset);
#
ncf::p::FSIN => define_and_load_float64 (to_temp, tcf::FEXT (flt_bitsize, trx::FSINE r), next, hap_offset);
ncf::p::FCOS => define_and_load_float64 (to_temp, tcf::FEXT (flt_bitsize, trx::FCOSINE r), next, hap_offset);
ncf::p::FTAN => define_and_load_float64 (to_temp, tcf::FEXT (flt_bitsize, trx::FTANGENT r), next, hap_offset);
#
_ => error "unexpected baseop in pure unary float64";
esac;
};
translate_nextcode_ops_to_treecode
(
ncf::PURE { op => ncf::p::PURE_ARITH { op, kind_and_size=>ncf::p::FLOAT 64 },
args => [ arg1, arg2 ],
to_temp,
next,
...
},
hap_offset
)
=>
{ arg1 = def_for_float_codetemp arg1;
arg2 = def_for_float_codetemp arg2;
value = case op
#
ncf::p::ADD => tcf::FADD (flt_bitsize, arg1, arg2);
ncf::p::MULTIPLY => tcf::FMUL (flt_bitsize, arg1, arg2);
ncf::p::SUBTRACT => tcf::FSUB (flt_bitsize, arg1, arg2);
ncf::p::DIVIDE => tcf::FDIV (flt_bitsize, arg1, arg2);
#
_ => error "unexpected baseop in pure binary float64";
esac;
def_and_load_or_inline_float64 (to_temp, value, next, hap_offset);
};
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::PURE_ARITH { op=>ncf::p::BITWISE_OR, kind_and_size }, args => [arg1, arg2], to_temp, next, ... }, hap_offset)
=>
define_and_load_with_kind_and_size (to_temp, kind_and_size, tcf::BITWISE_OR (int_bitsize, def_for_int_codetemp arg1, def_for_int_codetemp arg2), next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::PURE_ARITH { op=>ncf::p::BITWISE_AND, kind_and_size }, args => [arg1, arg2], to_temp, next, ... }, hap_offset)
=>
define_and_load_with_kind_and_size (to_temp, kind_and_size, tcf::BITWISE_AND (int_bitsize, def_for_int_codetemp arg1, def_for_int_codetemp arg2), next, hap_offset);
translate_nextcode_ops_to_treecode
(
ncf::PURE { op => ncf::p::PURE_ARITH { op, kind_and_size },
args => [arg1, arg2],
to_temp,
type,
next
},
hap_offset
)
=>
case kind_and_size
#
ncf::p::INT 31
=>
case op
ncf::p::BITWISE_XOR => define_and_load_tagged_int (to_temp, tagged_intxor ( arg1, arg2), next, hap_offset);
ncf::p::LSHIFT => define_and_load_tagged_int (to_temp, tagged_intlshift ( arg1, arg2), next, hap_offset);
ncf::p::RSHIFT => define_and_load_tagged_int (to_temp, tagged_intrshift (tcf::RIGHT_SHIFT, arg1, arg2), next, hap_offset);
ncf::p::ADD => define_and_load_tagged_int (to_temp, tagged_intadd ( tcf::ADD, arg1, arg2), next, hap_offset);
ncf::p::SUBTRACT => define_and_load_tagged_int (to_temp, tagged_intsub ( tcf::SUB, arg1, arg2), next, hap_offset);
ncf::p::MULTIPLY => define_and_load_tagged_int (to_temp, tagged_intmul (TRUE, tcf::MULS, arg1, arg2), next, hap_offset);
_ => error "translate_nextcode_ops_to_treecode: ncf::PURE ncf::INT 31";
esac;
ncf::p::INT 32
=>
case op
ncf::p::BITWISE_XOR => arith32 (tcf::BITWISE_XOR, arg1, arg2, to_temp, next, hap_offset);
ncf::p::LSHIFT => logical32 (tcf::LEFT_SHIFT, arg1, arg2, to_temp, next, hap_offset);
ncf::p::RSHIFT => logical32 (tcf::RIGHT_SHIFT, arg1, arg2, to_temp, next, hap_offset);
_ => error "translate_nextcode_ops_to_treecode: ncf::PURE ncf::INT 32";
esac;
ncf::p::UNT 31
=>
case op
ncf::p::ADD => define_and_load_tagged_int (to_temp, tagged_intadd ( tcf::ADD, arg1, arg2), next, hap_offset);
ncf::p::SUBTRACT => define_and_load_tagged_int (to_temp, tagged_intsub ( tcf::SUB, arg1, arg2), next, hap_offset);
ncf::p::MULTIPLY => define_and_load_tagged_int (to_temp, tagged_intmul (FALSE, tcf::MULU, arg1, arg2), next, hap_offset);
ncf::p::DIVIDE => # This is not really a pure
# operation -- oh well:
#
{ update_heap_allocation_pointer hap_offset;
define_and_load_tagged_int (to_temp, tagged_intdiv (FALSE, tcf::d::ROUND_TO_ZERO, arg1, arg2), next, 0);
};
ncf::p::REM => # Neither is this -- oh well
#
{ update_heap_allocation_pointer hap_offset;
define_and_load_tagged_int (to_temp, tagged_intrem (FALSE, tcf::d::ROUND_TO_ZERO, arg1, arg2), next, 0);
};
ncf::p::BITWISE_XOR => define_and_load_tagged_int (to_temp, tagged_intxor ( arg1, arg2), next, hap_offset);
ncf::p::LSHIFT => define_and_load_tagged_int (to_temp, tagged_intlshift ( arg1, arg2), next, hap_offset);
ncf::p::RSHIFT => define_and_load_tagged_int (to_temp, tagged_intrshift (tcf::RIGHT_SHIFT, arg1, arg2), next, hap_offset);
ncf::p::RSHIFTL => define_and_load_tagged_int (to_temp, tagged_intrshift (tcf::RIGHT_SHIFT_U, arg1, arg2), next, hap_offset);
#
_ => error "translate_nextcode_ops_to_treecode: ncf::PURE UINT 31";
esac;
ncf::p::UNT 32
=>
case op
ncf::p::ADD => arith32 (tcf::ADD, arg1, arg2, to_temp, next, hap_offset);
ncf::p::SUBTRACT => arith32 (tcf::SUB, arg1, arg2, to_temp, next, hap_offset);
ncf::p::MULTIPLY => arith32 (tcf::MULU, arg1, arg2, to_temp, next, hap_offset);
ncf::p::DIVIDE => { update_heap_allocation_pointer hap_offset;
arith32 (tcf::DIVU, arg1, arg2, to_temp, next, 0);
};
ncf::p::REM => { update_heap_allocation_pointer hap_offset;
arith32 (tcf::REMU, arg1, arg2, to_temp, next, 0);
};
ncf::p::BITWISE_XOR => arith32 (tcf::BITWISE_XOR, arg1, arg2, to_temp, next, hap_offset);
ncf::p::LSHIFT => logical32 (tcf::LEFT_SHIFT, arg1, arg2, to_temp, next, hap_offset);
ncf::p::RSHIFT => logical32 (tcf::RIGHT_SHIFT, arg1, arg2, to_temp, next, hap_offset);
ncf::p::RSHIFTL => logical32 (tcf::RIGHT_SHIFT_U, arg1, arg2, to_temp, next, hap_offset);
_ => error "translate_nextcode_ops_to_treecode: ncf::PURE UINT 32";
esac;
_ => error "unexpected numkind in pure binary arithop";
esac;
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::PURE_ARITH { op=>ncf::p::BITWISE_NOT, kind_and_size }, args => [ arg ], to_temp, next, ... }, hap_offset)
=>
case kind_and_size
#
(ncf::p::UNT 32
| ncf::p::INT 32)
=>
define_and_load_int1 (to_temp, tcf::BITWISE_XOR (int_bitsize, def_for_int_codetemp arg, unt 0uxFFFFFFFF), next, hap_offset);
(ncf::p::UNT 31
| ncf::p::INT 31)
=>
define_and_load_tagged_int (to_temp, tcf::SUB (int_bitsize, zero, def_for_int_codetemp arg), next, hap_offset);
_ => error "unexpected numkind in pure bitwise_not arithop";
esac;
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::PURE_ARITH { op=>ncf::p::NEGATE, kind_and_size }, args => [ arg ], to_temp, next, ... }, hap_offset)
=>
case kind_and_size
#
( ncf::p::UNT 32
| ncf::p::INT 32
)
=>
define_and_load_int1 (to_temp, tcf::SUB (int_bitsize, zero, def_for_int_codetemp arg), next, hap_offset);
( ncf::p::UNT 31
| ncf::p::INT 31
)
=>
define_and_load_tagged_int (to_temp, tcf::SUB (int_bitsize, int 2, def_for_int_codetemp arg), next, hap_offset);
_ => error "unexpected numkind in pure ~ baseop";
esac;
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::COPY ft, args => [ arg ], to_temp, next, ... }, hap_offset)
=>
case ft
#
(31, 32) => define_and_load_int1 (to_temp, tcf::RIGHT_SHIFT_U (int_bitsize, def_for_int_codetemp arg, one), next, hap_offset);
( 8, 32) => define_and_load_int1 (to_temp, tcf::RIGHT_SHIFT_U (int_bitsize, def_for_int_codetemp arg, one), next, hap_offset);
( 8, 31) => copy (chi::i31_type, to_temp, arg, next, hap_offset);
(n, m) => if (n == m) copy_m (m, to_temp, arg, next, hap_offset);
else error "translate_nextcode_ops_to_treecode: ncf::PURE: copy";
fi;
esac;
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::COPY_TO_INTEGER _, ... }, hap_offset)
=>
error "translate_nextcode_ops_to_treecode: ncf::PURE: copy_inf";
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::STRETCH ft, args => [ arg ], to_temp, next, ... }, hap_offset)
=>
case ft
(8, 31) => define_and_load_tagged_int (to_temp, tcf::RIGHT_SHIFT (int_bitsize, tcf::LEFT_SHIFT (int_bitsize, def_for_int_codetemp arg, int 23), int 23), next, hap_offset);
(8, 32) => define_and_load_int1 (to_temp, tcf::RIGHT_SHIFT (int_bitsize, tcf::LEFT_SHIFT (int_bitsize, def_for_int_codetemp arg, int 23), int 24), next, hap_offset);
(31, 32) => define_and_load_int1 (to_temp, tcf::RIGHT_SHIFT (int_bitsize, def_for_int_codetemp arg, one), next, hap_offset);
(n, m) => if (n == m) copy_m (m, to_temp, arg, next, hap_offset);
else error "translate_nextcode_ops_to_treecode: ncf::PURE: extend";
fi;
esac;
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::STRETCH_TO_INTEGER _, ... }, hap_offset)
=>
error "translate_nextcode_ops_to_treecode: ncf::PURE: extend_inf";
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::CHOP ft, args => [ arg ], to_temp, next, ... }, hap_offset)
=>
case ft
#
(32, 31) => define_and_load_tagged_int (to_temp, tcf::BITWISE_OR (int_bitsize, tcf::LEFT_SHIFT (int_bitsize, def_for_int_codetemp arg, one), one), next, hap_offset);
(31, 8) => define_and_load_int1 (to_temp, tcf::BITWISE_AND (int_bitsize, def_for_int_codetemp arg, int 0x1ff), next, hap_offset);
(32, 8) => define_and_load_int1 (to_temp, tag_unsigned (tcf::BITWISE_AND (int_bitsize, def_for_int_codetemp arg, int 0xff)), next, hap_offset);
(n, m) => if (n == m) copy_m (m, to_temp, arg, next, hap_offset);
else error "translate_nextcode_ops_to_treecode: ncf::PURE: trunc";
fi;
esac;
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::CHOP_INTEGER _, ... }, hap_offset)
=>
error "translate_nextcode_ops_to_treecode: ncf::PURE: trunc_inf";
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::HEAPCHUNK_LENGTH_IN_WORDS, args => [ arg ], to_temp, next, ... }, hap_offset)
=>
define_and_load_tagged_int (to_temp, get_heapchunk_length_as_tagged_int arg, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::VECTOR_LENGTH_IN_SLOTS, args => [ arg ], to_temp, type, next }, hap_offset)
=>
select (1, arg, to_temp, type, next, hap_offset);
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::RO_VECTOR_GET, args => [ arg, ncf::INT i ], to_temp, next, ... }, hap_offset) # ncf::INT is untagged.
=>
{ # Get data pointer:
#
mem = get_dataptr_ramregion arg;
a = hc_ptr (tcf::LOAD (int_bitsize, def_for_int_codetemp arg, mem));
mem' = get_rw_vector_ramregion mem;
define_and_load_boxed (to_temp, tcf::LOAD (int_bitsize, add_ix4 (a, ncf::INT i), mem'), next, hap_offset); # ncf::INT is untagged.
};
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::RO_VECTOR_GET, args => [ arg1, arg2 ], to_temp, next, ... }, hap_offset)
=>
{ # Get data pointer:
#
mem = get_dataptr_ramregion arg1;
a = hc_ptr (tcf::LOAD (int_bitsize, def_for_int_codetemp arg1, mem));
mem' = get_rw_vector_ramregion mem;
define_and_load_boxed (to_temp, tcf::LOAD (int_bitsize, add_ix4 (a, arg2), mem'), next, hap_offset);
};
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::PURE_GET_VECSLOT_NUMERIC_CONTENTS { kind_and_size=>ncf::p::INT 8 }, args => [ vector, index ], to_temp, next, ... }, hap_offset)
=>
{ # Get data pointer:
#
mem = get_dataptr_ramregion vector;
a = hc_ptr (tcf::LOAD (int_bitsize, def_for_int_codetemp vector, mem));
mem' = get_rw_vector_ramregion mem;
define_and_load_tagged_int (to_temp, tag_unsigned (tcf::LOAD (8, add_ix1 (a, index), mem')), next, hap_offset);
};
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::GET_BATAG_FROM_TAGWORD, args => [ arg ], to_temp, next, ... }, hap_offset)
=>
define_and_load_tagged_int (to_temp, tag_unsigned (tcf::BITWISE_AND (int_bitsize, get_heapchunk_tagword arg, int (tag::pow_tag_width - 1))), next, hap_offset);
#
# tag_width is 7 -- five bits of b-tag and two bits of a-tag.
# pow_tag_width is 2**tag_width == 1 << tag_width == 0b10000000
# pow_tag_width-1 is 2**tag_width == 1 << tag_width == 0b1111111 -- the mask we need to AND off just the b-tag and a-tag bits.
# 'arg' needs to be Int1 (not Tagged_Int) for this to work properly.
translate_nextcode_ops_to_treecode (ncf::PURE { op => ncf::p::MAKE_WEAK_POINTER_OR_SUSPENSION, args => [ctag, arg], to_temp, next, ... }, hap_offset) # ctag specifies weak pointer vs suspension.
=>
{ tagword = case ctag
ncf::INT ctag => int (tagword_to_int (tag::make_tagword (ctag, tag::weak_pointer_or_suspension_btag))); # ncf::INT is untagged.
_ => tcf::BITWISE_OR
( int_bitsize,
tcf::LEFT_SHIFT (int_bitsize, untag_signed(ctag), int tag::tag_width),
&