## translate-treecode-to-machcode-intel32-g.pkg
#
# CONTEXT:
#
# The Mythryl compiler code representations used are, in order:
#
# 1) Raw Syntax is the initial frontend code representation.
# 2) Deep Syntax is the second and final frontend code representation.
# 3) Lambdacode (polymorphically typed lambda calculus) is the first backend code representation, used only transitionally.
# 4) Anormcode (A-Normal format, which preserves expression tree structure) is the second backend code representation, and the first used for optimization.
# 5) Nextcode ("continuation-passing style", a single-assignment basic-block-graph form where call and return are essentially the same) is the third and chief backend tophalf code representation.
# 6) Treecode is the backend tophalf/lowhalf transitional code representation. It is typically slightly specialized for each target architecture, e.g. Intel32 (x86).
# 7) Machcode abstracts the target architecture machine instructions. It gets specialized for each target architecture.
# 8) Execode is absolute executable binary machine instructions for the target architecture.
#
# For general context, see
#
# src/A.COMPILER-PASSES.OVERVIEW
#
#
#
# This package implements translation from
# mostly-architecture-independent treecode form,
# specifically treecode_form_intel32, to
# entirely-architecture-dependent abstract x86 machine code.
#
# There is nothing particularly subtle here; we just
# grind through all the Treecode_Form cases and for
# each one construct a semantically equivalent sequence
# of x86 machine instructions.
#
# We use the Sethi-Ullman approach to linearize float
# expression-trees nearly optimally.
#
# This file is where we actually generate conditional branches
# to trap arithmetic overflow, when requested/appropriate.
#
# A lot of Intel32 machine instructions are restricted to
# specific registers (for example, for divides the divisor
# must be in edx:eax) so we do a lot of copying to such
# registers and then copying out to a temporary, to unpin;
# we hope the register allocator will vanish most of these
# move instructions.
#
# We do fold in a few low-level optimizations as we do
# the translation, mostly assembly-language tricks-of-the-trade
# type stuff like:
# o Fast set-to-zero using XOR when in registers.
# o Changing multiplies and divides to shifts where possible.
# o Swapping args of commutative binary ops when it is legal and a win.
# o Dropping explicit compare ops if preceding arithmetic
# already set the needed condition flags.
# o If architecture is not PENTIUM (i.e., PentiumPRO or better)
# generate cmovcc instructions for jump-free conditionals.
#
#
# In more detail:
#
# The stock architecture-agnostic Treecode_Form is defined in:
#
#
src/lib/compiler/back/low/treecode/treecode-form.api#
# Our mostly-architecture-independent Treecode_Form
# variant treecode_form_intel32 is defined in:
#
#
src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg#
# The intel32 architecture is described for the backend in
#
# src/lib/compiler/back/low/intel32/intel32.architecture-description
#
# which then gets processed to produce various files,
# in particular the two defining our entirely-architecture-dependent
# abstract machine code:
#
#
src/lib/compiler/back/low/intel32/code/machcode-intel32.codemade.api#
src/lib/compiler/back/low/intel32/code/machcode-intel32-g.codemade.pkg#
# but also
#
#
src/lib/compiler/back/low/intel32/code/registerkinds-intel32.codemade.pkg#
src/lib/compiler/back/low/intel32/emit/translate-machcode-to-asmcode-intel32-g.codemade.pkg#
# Runtime invocation of our 'translate_treecode_to_machcode' entrypoint is from
#
#
src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg#
# A good place to begin reading in this file is:
#
# fun do_void_expression'
# Compiled by:
#
src/lib/compiler/back/low/intel32/backend-intel32.lib# This is a revised version that takes into account of
# the extended intel32 instruction set, and has better handling of
# non-standard types. I've factored out the integer/floating point
# comparison code, added improvers for conditional moves.
# The latter generates SETcc and CMOVcc instructions not
# present on PENTIUM -- Pentium Pro and later only.
#
# To avoid problems, I have tried to incorporate as many
# of Lal's original magic incantations as possible.
#
# Changes include:
#
# 1. REMU/REMS are now supported
#
# 2. CONDITIONAL_LOAD is supported by generating SETcc and/or CMOVcc;
# this may require at least a Pentium II to work.
#
# 3. Division by a constant has been accellerated.
# Division by a power of 2 generates SHRL or SARL.
#
# 4. Better addressing mode selection has been implemented.
# This should improve array indexing.
#
# 5. Generate testl/testb instead of andl whenever appropriate.
# This is recommended by the Intel Optimization Guide and seems to improve
# boxity tests.
#
# More changes for floating point:
# A new mode is implemented which generates pseudo 3-address instructions
# for floating point. These instructions are register allocated the
# normal way, with the virtual registers mapped onto a set of pseudo
# %fp registers. These registers are then mapped onto the %st registers
# with a new postprocessing phase.
#
# -- Allen Leung
#DO set_control "compiler::trap_int_overflow" "TRUE";
stipulate
package lbl = codelabel; # codelabel is from
src/lib/compiler/back/low/code/codelabel.pkg package lem = lowhalf_error_message; # lowhalf_error_message is from
src/lib/compiler/back/low/control/lowhalf-error-message.pkg package lnt = lowhalf_notes; # lowhalf_notes is from
src/lib/compiler/back/low/code/lowhalf-notes.pkg package rkj = registerkinds_junk; # registerkinds_junk is from
src/lib/compiler/back/low/code/registerkinds-junk.pkg package tcp = treecode_pith; # treecode_pith is from
src/lib/compiler/back/low/treecode/treecode-pith.pkg package u32 = one_word_unt; # one_word_unt is from
src/lib/std/one-word-unt.pkg #
rewrite_ramreg = TRUE; # Should we rewrite ramregs?
enable_fast_fpmode = TRUE; # Set this to FALSE to disable "fast floating point" mode (== allocation of floating point registers on the hardware floating point stack).
herein
# We are invoked from:
#
#
src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg #
generic package translate_treecode_to_machcode_intel32_g (
#
# machcode_intel32_g is from
src/lib/compiler/back/low/intel32/code/machcode-intel32-g.codemade.pkg package mcf: Machcode_Intel32; # Machcode_Intel32 is from
src/lib/compiler/back/low/intel32/code/machcode-intel32.codemade.api # treecode_hashing_equality_and_display_g is from
src/lib/compiler/back/low/treecode/treecode-hashing-equality-and-display-g.pkg package tcj: Treecode_Hashing_Equality_And_Display # Treecode_Hashing_Equality_And_Display is from
src/lib/compiler/back/low/treecode/treecode-hashing-equality-and-display.api where
tcf == mcf::tcf; # "tcf" == "treecode_form".
# treecode_extension_compiler_intel32_g is from
src/lib/compiler/back/low/main/intel32/treecode-extension-compiler-intel32-g.pkg package txc: Treecode_Extension_Compiler # Treecode_Extension_Compiler is from
src/lib/compiler/back/low/treecode/treecode-extension-compiler.api where mcf == mcf # "mcf" == "machcode_form" (abstract machine code).
also tcf == mcf::tcf; # "tcf" == "treecode_form".
# treecode_codebuffer_g is from s rc/lib/compiler/back/low/treecode/treecode-codebuffer-g.pkg
package tcs: Treecode_Codebuffer # Treecode_Codebuffer is from
src/lib/compiler/back/low/treecode/treecode-codebuffer.api where
tcf == txc::tcf; # "tcf" == "treecode_form".
Architecture = PENTIUM
| PENTIUM_PRO | PENTIUM_II | PENTIUM_III;
architecture: Ref( Architecture );
convert_int_to_float_in_registers
:
{ type: mcf::tcf::Int_Bitsize, # "rgk" == "registerkinds".
src: mcf::Operand, # Source operand, guaranteed to be non-memory!
ref_notes: Ref( note::Notes ) # Notes on cccomponents. # "cccomponent" == "callgraph connectec component" (our nextcode unit of compilation).
}
->
{ ops: List( mcf::Machine_Op ), # The machine instructions.
temp_mem: mcf::Operand, # Temporary for CONVERT_INT_TO_FLOAT
cleanup: List( mcf::Machine_Op ) # Cleanup code
};
fast_floating_point: Ref( Bool );
#
# When thisflag is set we allot
# floating point registers directly
# on the floating point stack.
#
)
: (weak)
api {
include api Translate_Treecode_To_Machcode; # Translate_Treecode_To_Machcode is from
src/lib/compiler/back/low/treecode/translate-treecode-to-machcode.api rewrite_ramreg: Bool;
}
{
# Export to client packages:
#
package tcs = txc::tcs; # "tcs" == "treecode_stream".
package mcf = mcf; # "mcf" == "machcode_form".
package mcg = txc::mcg; # "mcg" == "machcode_controlflow_graph".
stipulate
package rgk = mcf::rgk; # "rgk" == "registerkinds". # registerkinds_intel32 is from
src/lib/compiler/back/low/intel32/code/registerkinds-intel32.codemade.pkg package mcf = mcf;
package tcf = mcf::tcf; # "tcf" == "treecode_form".
package crm # "crm" == "compile_register_moves".
=
compile_register_moves_g ( # compile_register_moves_g is from
src/lib/compiler/back/low/code/compile-register-moves-g.pkg mcf
);
herein
Codebuffer
=
tcs::Treecode_Codebuffer
(
mcf::Machine_Op,
rgk::Codetemplists,
mcg::Machcode_Controlflow_Graph
);
Treecode_Codebuffer
=
tcs::Treecode_Codebuffer
(
tcf::Void_Expression,
List( tcf::Expression ),
mcg::Machcode_Controlflow_Graph
);
Kind = FLOAT
| INTEGER;
package tct # Exported to client packages.
= # "tct" == "treecode_transforms".
treecode_transforms_g ( # treecode_transforms_g is from
src/lib/compiler/back/low/treecode/treecode-transforms-g.pkg #
package tcf = tcf; # "tcf" == "treecode_form".
package rgk = rgk; # "rgk" == "registerkinds".
#
int_bitsize = 32; # 64-bit issue.
natural_widths = [32]; # 64-bit issue.
Rep = SE
| ZE | NEITHER;
rep = NEITHER;
);
#
fun error msg
=
lem::error("translate_treecode_to_machcode_intel32_g", msg);
# Should we perform automatic ramreg translation?
# If this is on, we can avoid doing rewrite_pseudo phase entirely.
#
rewrite_ramreg = rewrite_ramreg;
# The following hardcoded
#
fun is_ramreg r # "ramregs" are fake registers living in ram, needed on x86 because it is so register-starved.
=
rewrite_ramreg and
{ r = rkj::intrakind_register_id_of r;
r >= 8 and r < 32;
};
#
fun is_framreg r # "framreg" is "floating-poing ram register".
=
if (enable_fast_fpmode and *fast_floating_point)
#
r = rkj::intrakind_register_id_of r;
r >= 8 and r < 32;
else
TRUE;
fi;
is_any_framreg
=
list::exists
(\\ r = { r = rkj::intrakind_register_id_of r;
#
r >= 8 and r < 32;
}
);
st0 = rgk::st 0; # Top of floating-point stack -- used to return float results.
st7 = rgk::st 7; # Last globally allocated float register -- float registers 0-7 are globally allocated, 8-32 are locally allocated.
# On Intel32 every op comes in triplicate,
# one version each for 8- 16 and 32-bit operations. Logical Arithmetic
# Increment Decrement Add Subtract Not Negate Shift-left right-shift right-shift Of And Xor
opcodes8 = { inc=>mcf::INCB, dec=>mcf::DECB, add=>mcf::ADDB, sub=>mcf::SUBB, notx=>mcf::NOTB, neg=>mcf::NEGB, shl=>mcf::SHLB, shr=>mcf::SHRB, sar=>mcf::SARB, orx=>mcf::ORB, andx=>mcf::ANDB, xor=>mcf::XORB };
opcodes16 = { inc=>mcf::INCW, dec=>mcf::DECW, add=>mcf::ADDW, sub=>mcf::SUBW, notx=>mcf::NOTW, neg=>mcf::NEGW, shl=>mcf::SHLW, shr=>mcf::SHRW, sar=>mcf::SARW, orx=>mcf::ORW, andx=>mcf::ANDW, xor=>mcf::XORW };
opcodes32 = { inc=>mcf::INCL, dec=>mcf::DECL, add=>mcf::ADDL, sub=>mcf::SUBL, notx=>mcf::NOTL, neg=>mcf::NEGL, shl=>mcf::SHLL, shr=>mcf::SHRL, sar=>mcf::SARL, orx=>mcf::ORL, andx=>mcf::ANDL, xor=>mcf::XORL };
# Our main entrypoint. We are called (only) from:
#
#
src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg #
fun make_treecode_to_machcode_codebuffer
(
buf
#
# 'buf' is our interface to
#
#
src/lib/compiler/back/low/mcg/make-machcode-codebuffer-g.pkg #
# which constructs a machine-code graph driven by our 'put commands:
# basically we do a lot of
#
# buf.put_op
#
# calls to construct the graph and then one
#
# resultgraph = buf.get_completed_cccomponent
#
# call to get the resulting machcode controlflow graph.
)
: Treecode_Codebuffer
=
{
put_base_op = buf.put_op o mcf::BASE_OP;
exception EA;
# Here we track the codelabel and machine
# instruction for our branch_on_overflow traps.
# We create these as-needed -- one per cccomponent:
#
branch_on_overflow_instruction_and_label
=
REF (NULL: Null_Or ((mcf::Machine_Op, lbl::Codelabel)) );
# flag floating point generation
#
floating_point_used = REF FALSE;
# Effective address of an integer register
#
fun ea_of_int_reg r = if (is_ramreg r) mcf::RAMREG r; else mcf::DIRECT r; fi;
fun ea_of_float_reg r = if (is_framreg r) mcf::FDIRECT r; else mcf::FPR r; fi;
#
fun put_branch_on_overflow ()
=
buf.put_op branch_on_overflow
where
branch_on_overflow
=
case *branch_on_overflow_instruction_and_label
#
THE (branch_on_overflow, _) => branch_on_overflow; # Re-use existing branch instruction.
NULL =>
{ # This is the first overflow trap in this cccomponent.
# Generate label for overflow traps to jump to:
#
label = lbl::make_codelabel_generator "trap" (); # Create, use and discard a codelabel generator.
# Generate branch to that label which is conditional
# on the OVERFLOW bit being set in the condition register:
#
branch_on_overflow
=
mcf::NOTE { op => mcf::jcc { cond => mcf::OO, operand => mcf::IMMED_LABEL (tcf::LABEL label) }, # Branch on integer overflow.
note => lnt::BRANCH_PROBABILITY probability::unlikely # We hope overflows are rare!
};
# Save both label and branch instruction for re-use:
#
branch_on_overflow_instruction_and_label
:=
THE (branch_on_overflow, label);
branch_on_overflow;
};
esac;
end;
make_int_codetemp_info = rgk::make_int_codetemp_info; # These are codetemps, of unlimited number. We map them to
make_float_codetemp_info = rgk::make_float_codetemp_info; # hardware registers later -- see
src/lib/compiler/back/low/regor/solve-register-allocation-problems-by-iterated-coalescing-g.pkg #
fun fsize 32 => mcf::FP32;
fsize 64 => mcf::FP64;
fsize 80 => mcf::FP80;
fsize _ => error "fsize";
end;
# Mark an expression with a list of annotations
# and then emit it:
#
fun annotate_and_emit_expression' (op, []) => buf.put_op op;
annotate_and_emit_expression' (op, note ! notes) => annotate_and_emit_expression' (mcf::NOTE { op, note }, notes);
end;
# Annotate an expression and emit it
#
fun annotate_and_emit_expression (i, notes)
=
annotate_and_emit_expression' (mcf::BASE_OP i, notes);
put_ops = apply buf.put_op;
# Emit parallel copies for integers.
# Translate parallel copies that involve memregs
# into individual copies.
#
fun copy_ints ([], [], notes)
=>
();
copy_ints (dst, src, notes)
=>
put_ops
(crm::compile_int_register_moves
{ move_instruction, ea => ea_of_int_reg }
{ tmp => THE (mcf::DIRECT (make_int_codetemp_info ())),
dst,
src
}
)
where
fun move_instruction
{ dst as mcf::RAMREG rd,
src as mcf::RAMREG rs
}
=>
if (rkj::codetemps_are_same_color (rd, rs))
[];
else
tmp_r = mcf::DIRECT (make_int_codetemp_info ());
[ mcf::move { mv_op=>mcf::MOVL, src, dst=>tmp_r },
mcf::move { mv_op=>mcf::MOVL, src=>tmp_r, dst }
];
fi;
move_instruction
{ dst=>mcf::DIRECT rd,
src=>mcf::DIRECT rs
}
=>
if (rkj::codetemps_are_same_color (rd, rs))
[];
else [mcf::COPY { kind =>rkj::INT_REGISTER, size_in_bits=>32, dst => [rd], src => [rs], tmp => NULL } ];
fi;
move_instruction { dst, src }
=>
[mcf::move { mv_op=>mcf::MOVL, src, dst } ];
end;
end;
end;
itow = unt::from_int; # Conversions.
wtoi = unt::to_int;
#
fun to_int1 i
=
tcf::mi::to_int1 (32, i);
w32toi32 = one_word_unt::to_multiword_int_x;
i32tow32 = one_word_unt::from_multiword_int;
fun w_to_int1 w
=
one_word_int::from_multiword_int (one_word_unt::to_multiword_int_x w); # One day, this is going to bite us when precision (large_int)>32 # XXX BUGGO FIXME 64-bit issue
eax = mcf::DIRECT (rgk::eax); # Some useful registers.
ecx = mcf::DIRECT (rgk::ecx);
edx = mcf::DIRECT (rgk::edx);
#
fun immed_label lab
=
mcf::IMMED_LABEL (tcf::LABEL lab);
fun expression_is_zero (tcf::LITERAL z) => z == 0; # Is the expression zero?
expression_is_zero (tcf::RNOTE (e, a)) => expression_is_zero e;
expression_is_zero _ => FALSE;
end;
# Does the expression affect the condition-register zero flag?
# WARNING: we assume these things are not optimized out!
#
fun expression_affects_zero_flag (tcf::BITWISE_AND _) => TRUE;
expression_affects_zero_flag (tcf::BITWISE_OR _) => TRUE;
expression_affects_zero_flag (tcf::BITWISE_XOR _) => TRUE;
expression_affects_zero_flag (tcf::RIGHT_SHIFT _) => TRUE;
expression_affects_zero_flag (tcf::RIGHT_SHIFT_U _) => TRUE;
expression_affects_zero_flag (tcf::LEFT_SHIFT _) => TRUE;
expression_affects_zero_flag (tcf::SUB _) => TRUE;
expression_affects_zero_flag (tcf::ADD_OR_TRAP _) => TRUE;
expression_affects_zero_flag (tcf::SUB_OR_TRAP _) => TRUE;
expression_affects_zero_flag (tcf::RNOTE (e, _)) => expression_affects_zero_flag e;
expression_affects_zero_flag _ => FALSE;
end;
#
fun expression_affects_zero_flag2 (tcf::BITWISE_AND _) => TRUE;
expression_affects_zero_flag2 (tcf::BITWISE_OR _) => TRUE;
expression_affects_zero_flag2 (tcf::BITWISE_XOR _) => TRUE;
expression_affects_zero_flag2 (tcf::RIGHT_SHIFT _) => TRUE;
expression_affects_zero_flag2 (tcf::RIGHT_SHIFT_U _) => TRUE;
expression_affects_zero_flag2 (tcf::LEFT_SHIFT _) => TRUE;
expression_affects_zero_flag2 (tcf::ADD (32, _, _)) => TRUE; # Can't use leal! # Probable 64-bit issue -- presumably 32 is bits-per-word.
expression_affects_zero_flag2 (tcf::SUB _) => TRUE;
expression_affects_zero_flag2 (tcf::ADD_OR_TRAP _) => TRUE;
expression_affects_zero_flag2 (tcf::SUB_OR_TRAP _) => TRUE;
expression_affects_zero_flag2 (tcf::RNOTE (e, _)) => expression_affects_zero_flag2 e;
expression_affects_zero_flag2 _ => FALSE;
end;
# Emit parallel copies for floating point -- normal version:
#
fun copy_floats'(fty, [], [], _)
=>
();
copy_floats'(fty, dst as [_], src as [_], notes)
=>
annotate_and_emit_expression' (mcf::COPY { kind => rkj::FLOAT_REGISTER, size_in_bits=>fty, dst, src, tmp=>NULL }, notes);
copy_floats'(fty, dst, src, notes)
=>
annotate_and_emit_expression' (mcf::COPY { kind => rkj::FLOAT_REGISTER, size_in_bits=>fty, dst, src, tmp=>THE (mcf::FDIRECT (make_float_codetemp_info())) }, notes);
end;
# Emit parallel copies for floating point -- fast version.
# Translates parallel copies that involve memregs into
# individual copies.
#
fun copy_floats''(fty, [], [], _)
=>
();
copy_floats''(fty, dst, src, notes)
=>
if (TRUE or is_any_framreg dst or is_any_framreg src)
fsize = fsize fty;
#
fun move_instruction { dst, src }
=
[ mcf::fmove { fsize, src, dst } ];
put_ops (
crm::compile_int_register_moves
{ move_instruction, ea=>ea_of_float_reg }
{ tmp=>case dst
[_] => NULL;
_ => THE (mcf::FPR (make_int_codetemp_info ()));
esac,
dst, src
}
);
else
annotate_and_emit_expression'
( mcf::COPY
{ kind => rkj::FLOAT_REGISTER,
size_in_bits => fty,
dst,
src,
tmp=>case dst
[_] => NULL;
_ => THE (mcf::FPR (make_float_codetemp_info ()));
esac
},
notes
);
fi;
end;
#
fun copy_floats x
=
if (enable_fast_fpmode and *fast_floating_point)
copy_floats'' x;
else copy_floats' x;
fi;
# Translate Treecode condition code
# to intel32 condition code:
#
fun cond tcf::LT => mcf::LT; cond tcf::LTU => mcf::BB;
cond tcf::LE => mcf::LE; cond tcf::LEU => mcf::BE;
cond tcf::EQ => mcf::EQ; cond tcf::NE => mcf::NE;
cond tcf::GE => mcf::GE; cond tcf::GEU => mcf::AE;
cond tcf::GT => mcf::GT; cond tcf::GTU => mcf::AA;
#
cond cc => error (cat ["cond(", tcp::cond_to_string cc, ")"]);
end;
#
fun zero dst
=
put_base_op (mcf::BINARY { bin_op=>mcf::XORL, src=>dst, dst } );
#
fun move'(src as mcf::DIRECT s, dst as mcf::DIRECT d, notes) # Move and annotate.
=>
if (not (rkj::codetemps_are_same_color (s, d)))
#
annotate_and_emit_expression' (mcf::COPY { kind => rkj::INT_REGISTER, size_in_bits=>32, dst => [d], src => [s], tmp => NULL }, notes);
fi;
move'(mcf::IMMED 0, dst as mcf::DIRECT d, notes)
=>
annotate_and_emit_expression (mcf::BINARY { bin_op=>mcf::XORL, src=>dst, dst }, notes); # XOR register with itself to clear it.
move'(src, dst, notes)
=>
annotate_and_emit_expression (mcf::MOVE { mv_op=>mcf::MOVL, src, dst }, notes);
end;
#
fun move (src, dst) # Move only!
=
move'(src, dst, []);
readonly = mcf::rgn::readonly;
#
fun address (ea, ramregion) # Compute an effective address.
=
{
# Keep building a bigger and bigger effective address expressions
# The input is a list of trees
# b -- base
# i -- index
# s -- scale
# d -- immed displacement
#
fun do_ea ([], b, i, s, d)
=>
make_addressing_mode (b, i, s, d);
do_ea (t ! trees, b, i, s, d)
=>
case t
tcf::LITERAL n => do_eaimmed (trees, to_int1 n, b, i, s, d);
tcf::LATE_CONSTANT _ => do_ealabel (trees, t, b, i, s, d);
tcf::LABEL _ => do_ealabel (trees, t, b, i, s, d);
tcf::LABEL_EXPRESSION le => do_ealabel (trees, le, b, i, s, d);
tcf::ADD (32, t1, t2 as tcf::CODETEMP_INFO(_, r))
=>
if (is_ramreg r) do_ea (t2 ! t1 ! trees, b, i, s, d);
else do_ea (t1 ! t2 ! trees, b, i, s, d);
fi;
tcf::ADD (32, t1, t2)
=>
do_ea (t1 ! t2 ! trees, b, i, s, d);
tcf::SUB (32, t1, tcf::LITERAL n)
=>
do_ea (t1 ! tcf::LITERAL (tcf::mi::neg (32, n)) ! trees, b, i, s, d);
tcf::LEFT_SHIFT (32, t1, tcf::LITERAL n)
=>
{ n = tcf::mi::to_int (32, n);
#
case n
0 => displace (trees, t1, b, i, s, d);
1 => indexed (trees, t1, t, 1, b, i, s, d);
2 => indexed (trees, t1, t, 2, b, i, s, d);
3 => indexed (trees, t1, t, 3, b, i, s, d);
_ => displace (trees, t, b, i, s, d);
esac;
};
t => displace (trees, t, b, i, s, d);
esac;
end
also
fun do_eaimmed (trees, 0, b, i, s, d) # Add an immediate constant.
=>
do_ea (trees, b, i, s, d);
do_eaimmed (trees, n, b, i, s, mcf::IMMED m)
=>
do_ea (trees, b, i, s, mcf::IMMED (n+m));
do_eaimmed (trees, n, b, i, s, mcf::IMMED_LABEL le)
=>
do_ea (trees, b, i, s,
mcf::IMMED_LABEL (tcf::ADD (32, le, tcf::LITERAL (tcf::mi::from_int1 (32, n)))));
do_eaimmed (trees, n, b, i, s, _)
=>
error "do_eaimmed";
end
also
fun do_ealabel (trees, le, b, i, s, mcf::IMMED 0) # Add a label expression.
=>
do_ea (trees, b, i, s, mcf::IMMED_LABEL le);
do_ealabel (trees, le, b, i, s, mcf::IMMED m)
=>
do_ea (
trees,
b,
i,
s,
mcf::IMMED_LABEL (tcf::ADD (32, le, tcf::LITERAL (tcf::mi::from_int1 (32, m))))
except
OVERFLOW = error "do_ealabel: constant too large"
);
do_ealabel (trees, le, b, i, s, mcf::IMMED_LABEL le')
=>
do_ea (trees, b, i, s, mcf::IMMED_LABEL (tcf::ADD (32, le, le')));
do_ealabel (trees, le, b, i, s, _)
=>
error "doEALabel";
end
also
fun make_addressing_mode (NULL, NULL, _, disp)
=>
disp;
make_addressing_mode (THE base, NULL, _, disp)
=>
mcf::DISPLACE { base, disp, ramregion };
make_addressing_mode (base, THE index, scale, disp)
=>
mcf::INDEXED { base, index, scale, disp, ramregion };
end
also
fun expr_not_esp tree # Generate code for tree and ensure that it is not in %esp.
=
{ r = expr tree;
#
if (rkj::codetemps_are_same_color (r, rgk::esp))
#
tmp = make_int_codetemp_info ();
move (mcf::DIRECT r, mcf::DIRECT tmp);
tmp;
else
r;
fi;
}
also # Add a base register.
fun displace (trees, t, NULL, i, s, d)
=>
do_ea (trees, THE (expr t), i, s, d); # no base yet
displace (trees, t, b as THE base, NULL, _, d) # no index
=>
{ i = expr t; # Make t the index, but make sure that it is not %esp!
#
if (rkj::codetemps_are_same_color (i, rgk::esp) )
# # Swap base and index.
if (rkj::codetemps_are_same_color (base, rgk::esp) )
#
do_ea (trees, THE i, b, 0, d);
else
index = make_int_codetemp_info (); # Base and index = %esp!
move (mcf::DIRECT i, mcf::DIRECT index);
do_ea (trees, b, THE index, 0, d);
fi;
else
do_ea (trees, b, THE i, 0, d);
fi;
};
displace (trees, t, THE base, i, s, d) /* base and index */
=>
{ b = expr (tcf::ADD (32, tcf::CODETEMP_INFO (32, base), t));
do_ea (trees, THE b, i, s, d);
};
end
# Add an indexed register
also
fun indexed (trees, t, t0, scale, b, NULL, _, d) # no index yet
=>
do_ea (trees, b, THE (expr_not_esp t), scale, d);
indexed (trees, _, t0, _, NULL, i, s, d) # no base
=>
do_ea (trees, THE (expr t0), i, s, d);
indexed (trees, _, t0, _, THE base, i, s, d) # Base and index
=>
{ b = expr (tcf::ADD (32, t0, tcf::CODETEMP_INFO (32, base)));
do_ea (trees, THE b, i, s, d);
};
end;
case (do_ea([ea], NULL, NULL, 0, mcf::IMMED 0))
#
mcf::IMMED _ => raise exception EA;
mcf::IMMED_LABEL le => mcf::LABEL_EA le;
ea => ea;
esac;
} # fun address
# Convert a tcf expression
# to an mcf operand:
#
also
fun operand ( tcf::LITERAL i ) => mcf::IMMED (to_int1 (i));
#
operand (x as tcf::LATE_CONSTANT _ ) => mcf::IMMED_LABEL x;
operand (x as tcf::LABEL _ ) => mcf::IMMED_LABEL x;
operand ( tcf::LABEL_EXPRESSION le ) => mcf::IMMED_LABEL le;
#
operand ( tcf::CODETEMP_INFO (_, r) ) => ea_of_int_reg r;
operand ( tcf::LOAD (32, ea, ramregion) ) => address (ea, ramregion);
#
operand (t ) => mcf::DIRECT (expr t);
end
also
fun move_to_reg (operand)
=
{ dst = mcf::DIRECT (make_int_codetemp_info ());
move (operand, dst); dst;
}
also
fun reduce_operand (mcf::DIRECT r)
=>
r;
reduce_operand operand
=>
{ dst = make_int_codetemp_info ();
move (operand, mcf::DIRECT dst);
dst;
};
end
# Ensure that the operand is
# either an immed or register:
#
also
fun immed_or_reg (operand as mcf::DISPLACE _) => move_to_reg operand;
immed_or_reg (operand as mcf::INDEXED _) => move_to_reg operand;
immed_or_reg (operand as mcf::RAMREG _) => move_to_reg operand;
immed_or_reg (operand as mcf::LABEL_EA _) => move_to_reg operand;
immed_or_reg operand => operand;
end
also
fun is_immediate (mcf::IMMED _) => TRUE;
is_immediate (mcf::IMMED_LABEL _) => TRUE;
is_immediate _ => FALSE;
end
also
fun reg_or_mem operand
=
if (is_immediate operand) move_to_reg operand;
else operand;
fi
also
fun is_mem_operand operand
=
case operand
#
mcf::DISPLACE _ => TRUE;
mcf::INDEXED _ => TRUE;
mcf::RAMREG _ => TRUE;
mcf::LABEL_EA _ => TRUE;
mcf::FDIRECT f => TRUE;
#
_ => FALSE;
esac
also
fun do_expression (expression, rd: rkj::Codetemp_Info, notes) # "rd" == "destination int register".
=
# Compute an integer expression and leave the
# result in the destination register rd.
#
{ rd_operand = ea_of_int_reg rd;
#
fun same_as_dest_reg (mcf::DIRECT r) => rkj::codetemps_are_same_color (r, rd);
same_as_dest_reg (mcf::RAMREG r) => rkj::codetemps_are_same_color (r, rd);
#
same_as_dest_reg _ => FALSE;
end;
# Emit a binary operator. If the destination is
# a ramreg, do something smarter.
#
fun gen_binary (bin_op, operand1, operand2)
=
if ( is_ramreg rd
and (is_mem_operand operand1 or is_mem_operand operand2)
or same_as_dest_reg operand2
)
tmp_r = make_int_codetemp_info ();
tmp = mcf::DIRECT tmp_r;
move (operand1, tmp);
annotate_and_emit_expression (mcf::BINARY { bin_op, src=>operand2, dst=>tmp }, notes);
move (tmp, rd_operand);
else
move (operand1, rd_operand);
annotate_and_emit_expression (mcf::BINARY { bin_op, src=>operand2, dst=>rd_operand }, notes);
fi;
# Generate a binary operator; it may commute:
#
fun binary_comm (bin_op, e1, e2)
=
gen_binary (bin_op, operand1, operand2)
where
my (operand1, operand2)
=
case (operand e1, operand e2)
#
(x as mcf::IMMED _, y) => (y, x);
(x as mcf::IMMED_LABEL _, y) => (y, x);
(x, y as mcf::DIRECT _ ) => (y, x);
(x, y) => (x, y);
esac;
end;
# Generate a binary operator; non-commutative:
#
fun binary (bin_op, e1, e2)
=
gen_binary (bin_op, operand e1, operand e2);
# Generate a unary operator:
#
fun unary (un_op, e)
=
{ operand = operand e;
#
if (is_ramreg rd and is_mem_operand operand)
#
tmp = mcf::DIRECT (make_int_codetemp_info ());
#
move (operand, tmp);
move (tmp, rd_operand);
else
move (operand, rd_operand);
fi;
annotate_and_emit_expression (mcf::UNARY { un_op, operand=>rd_operand }, notes);
};
# Generate shifts. The shift
# amount must be a constant or in %ecx
#
fun shift (opcode, e1, e2)
=
{ operand1 = operand e1;
operand2 = operand e2;
case operand2
#
mcf::IMMED _
=>
gen_binary (opcode, operand1, operand2);
_ =>
if (same_as_dest_reg operand2)
#
tmp_r = make_int_codetemp_info ();
tmp = mcf::DIRECT tmp_r;
move (operand1, tmp);
move (operand2, ecx);
annotate_and_emit_expression (mcf::BINARY { bin_op=>opcode, src=>ecx, dst=>tmp }, notes);
move (tmp, rd_operand);
else
move (operand1, rd_operand);
move (operand2, ecx);
annotate_and_emit_expression (mcf::BINARY { bin_op=>opcode, src=>ecx, dst=>rd_operand }, notes);
fi;
esac;
};
# Division or remainder -- same instruction on Intel32.
#
# Intel32 requires that the divisor be in %edx:%eax regpair.
#
# Intel32 leaves
# the quotient in EAX,
# the remainder in EDX.
#
# Our 'result_reg' argument tells
# us which of the two to use.
#
# If 'overflow' is TRUE we append a branch_on_overflow instruction.
# If 'signed' is TRUE we do signed division, otherwise unsigned:
#
fun divrem (signed, overflow, e1, e2, result_reg)
=
{ my (operand1, operand2 )
= (operand e1, operand e2);
# First we copy our 32-bit divisor into EAX and
# then extend it to a 64-bit value in EDX:EAX:
#
move (operand1, eax);
#
mult_div_op
=
if signed
put_base_op mcf::CDQ; # Sign-extend eax into edx.
mcf::IDIVL1;
else
zero edx;
mcf::DIVL1;
fi;
# Do the actual un/signed divide instruction:
#
annotate_and_emit_expression (mcf::MULTDIV { mult_div_op, src=>reg_or_mem operand2 }, notes);
# Save either quotient or remainder,
# per caller request:
#
move (result_reg, rd_operand); # Move either quotient or remainder to rd_operand (result-to-use).
if overflow put_branch_on_overflow(); fi;
};
fun divinf0 (overflow, e1, e2) # Division with rounding to negative infinity
= # Intel hardware divide rounds to zero, so we have to fake it here.
{
o1 = operand e1;
o2 = operand e2;
l = lbl::make_anonymous_codelabel ();
move (o1, eax); # Move 32-bit divisor to EAX.
put_base_op mcf::CDQ; # Sign-extend to yield 64-bit divisor in EDX:EAX.
annotate_and_emit_expression # Do actual divide.
( mcf::MULTDIV { mult_div_op => mcf::IDIVL1, src => reg_or_mem o2 },
notes
);
if overflow put_branch_on_overflow(); fi;
apply put_base_op # Fake round-to-negative-infinity given rounded-to-zero result.
[ mcf::CMPL { lsrc => edx, rsrc => mcf::IMMED 0 },
mcf::JCC { cond => mcf::EQ, operand => immed_label l },
mcf::BINARY { bin_op => mcf::XORL,
src => reg_or_mem o2,
dst => edx
},
mcf::JCC { cond => mcf::GE, operand => immed_label l },
mcf::UNARY { un_op => mcf::DECL, operand => eax }
];
buf.put_private_label l;
move (eax, rd_operand);
};
# Analyze for power-of-two-ness
#
fun power_of_two_check i' # i>0 is a power of two if ((i-1) & i) == 0
= # Put another way, adding 1 to a number will flip all existing 1 bits to zero
{ # if-and-only-if they form an unbroken sequence starting at bit zero.
i = to_int1 i';
{ my (isneg, a, w)
=
if (i >= 0) (FALSE, i, tcf::mi::to_unt1 (32, i'));
else (TRUE, -i, tcf::mi::to_unt1 (32, tcf::mi::neg (32, i')));
fi;
fun log2 (0u1, p) => p; # Obviously a 'case' or other table-lookup would do nicely here.
log2 ( w, p) => log2 (u32::(>>) (w, 0u1), p + 1);
end;
if (w > 0u1 and u32::bitwise_and (w - 0u1, w) == 0u0)
#
(i, THE (isneg, a, tcf::LITERAL (tcf::mi::from_int1 (32, log2 (w, 0)))));
else
(i, NULL);
fi;
}
except
_ = (i, NULL);
};
# Division by a power of two when rounding to neginf is the # Usually we round to zero because that's what Intel hardware does.
# same as an arithmetic right shift: # But we could still use this if we could deduce a number must be nonnegative.
#
fun divinf (overflow, e1, e2 as tcf::LITERAL n')
=>
case (power_of_two_check n')
#
(_, NULL)
=>
divinf0 (overflow, e1, e2);
(_, THE (FALSE, _, p))
=>
shift (mcf::SARL, tcf::CODETEMP_INFO (32, expr e1), p);
(_, THE (TRUE, _, p))
=>
{ reg = expr e1;
put_base_op (mcf::UNARY { un_op => mcf::NEGL, operand => mcf::DIRECT reg } );
shift (mcf::SARL, tcf::CODETEMP_INFO (32, reg), p);
};
esac;
divinf (overflow, e1, e2)
=>
divinf0 (overflow, e1, e2);
end;
#
fun reminf0 (e1, e2) # Remainder when roundint to negative infinity.
= # Intel hardware divide rounds to zero, so we have to fake it here.
{ o1 = operand e1;
o2 = operand e2;
l = lbl::make_anonymous_codelabel ();
move (o1, eax);
put_base_op mcf::CDQ;
annotate_and_emit_expression (mcf::MULTDIV { mult_div_op => mcf::IDIVL1, src => reg_or_mem o2 },
notes);
# Now we fake round-to-negative-infinity given rounded-to-zero result.
#
apply put_base_op [ mcf::CMPL { lsrc => edx, rsrc => mcf::IMMED 0 },
mcf::JCC { cond => mcf::EQ, operand => immed_label l }
];
#
move (edx, eax);
#
apply put_base_op [ mcf::BINARY { bin_op => mcf::XORL, src => reg_or_mem o2, dst => eax },
mcf::JCC { cond => mcf::GE, operand => immed_label l },
mcf::BINARY { bin_op => mcf::ADDL, src => reg_or_mem o2, dst => edx }
];
buf.put_private_label l;
move (edx, rd_operand);
};
# n mod (power-of-2) corresponds to a bitmask (AND).
# If the power is negative, then we must first negate
# the argument and then again negate the result.
#
fun reminf (e1, e2 as tcf::LITERAL n')
=>
case (power_of_two_check n')
#
(_, NULL)
=>
reminf0 (e1, e2);
(_, THE (FALSE, a, _))
=>
binary_comm (mcf::ANDL, e1,
tcf::LITERAL (tcf::mi::from_int1 (32, a - 1)));
(_, THE (TRUE, a, _))
=>
{ r1 = expr e1;
o1 = mcf::DIRECT r1;
put_base_op (mcf::UNARY { un_op => mcf::NEGL, operand => o1 } );
put_base_op (mcf::BINARY { bin_op => mcf::ANDL,
src => mcf::IMMED (a - 1),
dst => o1
}
);
unary (mcf::NEGL, tcf::CODETEMP_INFO (32, r1));
};
esac;
reminf (e1, e2)
=>
reminf0 (e1, e2);
end;
# Improve the special case for division:
#
fun divide (signed, overflow, e1, e2 as tcf::LITERAL n')
=>
case (power_of_two_check n')
#
(n, THE (isneg, a, p))
=>
if (not signed)
#
shift (mcf::SHRL, e1, p);
else
label = lbl::make_anonymous_codelabel ();
reg1 = expr e1;
operand1 = mcf::DIRECT reg1;
if isneg put_base_op (mcf::UNARY { un_op => mcf::NEGL, operand => operand1 } );
elif (expression_affects_zero_flag e1) ();
else put_base_op (mcf::CMPL { lsrc => operand1, rsrc => mcf::IMMED 0 } );
fi;
put_base_op (mcf::JCC { cond => mcf::GE, operand => immed_label label } );
put_base_op
if (a == 2) mcf::UNARY { un_op => mcf::INCL,
operand => operand1
};
else
mcf::BINARY { bin_op => mcf::ADDL,
src => mcf::IMMED (a - 1),
dst => operand1
};
fi;
buf.put_private_label label;
shift (mcf::SARL, tcf::CODETEMP_INFO (32, reg1), p);
fi;
(n, NULL)
=>
divrem (signed, overflow and (n == -1 or n == 0), e1, e2, eax);
esac;
divide (signed, overflow, e1, e2)
=>
divrem (signed, overflow, e1, e2, eax);
end;
# rem never causes overflow
#
fun rem (signed, e1, e2 as tcf::LITERAL n')
=>
case (power_of_two_check n')
#
(n, THE (isneg, a, _))
=>
if signed
#
# The following logic should work uniformly
# for both isneg and not isneg. It only uses
# the absolute value (a) of the divisor.
# Here is the formula:
# let p be a power of two and a = abs (p):
#
# x % p = x - ((x < 0 ? x + a - 1: x) & (-a))
#
# (That's what GCC seems to do.)
#
r1 = expr e1;
o1 = mcf::DIRECT r1;
#
rt = make_int_codetemp_info ();
#
tmp = mcf::DIRECT rt;
l = lbl::make_anonymous_codelabel ();
move (o1, tmp);
if (not (expression_affects_zero_flag e1))
#
put_base_op (mcf::CMPL { lsrc => o1,
rsrc => mcf::IMMED 0
}
);
fi;
put_base_op (mcf::JCC { cond => mcf::GE, operand => immed_label l } );
put_base_op (mcf::BINARY { bin_op => mcf::ADDL,
src => mcf::IMMED (a - 1),
dst => tmp
}
);
buf.put_private_label l;
put_base_op (mcf::BINARY { bin_op => mcf::ANDL,
src => mcf::IMMED (-a),
dst => tmp
}
);
binary (mcf::SUBL, tcf::CODETEMP_INFO (32, r1), tcf::CODETEMP_INFO (32, rt));
elif isneg
# This is really strange...
divrem (FALSE, FALSE, e1, e2, edx);
else
binary_comm (mcf::ANDL, e1,
tcf::LITERAL (tcf::mi::from_int1 (32, n - 1)));
fi;
(_, NULL)
=>
divrem (signed, FALSE, e1, e2, edx);
esac;
rem (signed, e1, e2)
=>
divrem (signed, FALSE, e1, e2, edx);
end;
# Make sure the destination is a register:
#
fun dst_must_be_reg f
=
if (not (is_ramreg rd))
#
f (rd, rd_operand);
else
tmp_r = make_int_codetemp_info ();
tmp = mcf::DIRECT (tmp_r);
f (tmp_r, tmp);
move (tmp, rd_operand);
fi;
# unsigned integer multiplication
#
fun u_multiply0 (e1, e2)
=
# note e2 can never be (mcf::DIRECT edx)
{ move (operand e1, eax);
annotate_and_emit_expression (mcf::MULTDIV { mult_div_op=>mcf::MULL1,
src=>reg_or_mem (operand e2) }, notes);
move (eax, rd_operand);
};
#
fun u_multiply (e1, e2 as tcf::LITERAL n')
=>
case (power_of_two_check n')
#
(_, THE (FALSE, _, p))
=>
shift (mcf::SHLL, e1, p);
_ =>
u_multiply0 (e1, e2);
esac;
u_multiply (e1 as tcf::LITERAL _, e2) => u_multiply (e2, e1);
u_multiply (e1, e2) => u_multiply0 (e1, e2);
end;
# signed integer multiplication:
# The only forms that are allowed that also sets the
# OF and CF flags are:
#
# (dst) (src1) (src2)
# imul r32, r32/m32, imm8
# (dst) (src)
# imul r32, imm8
# imul r32, imm32
# imul r32, r32/m32
# Note: destination must be a register!
#
fun multiply (e1, e2)
=
dst_must_be_reg
(\\ (rd, rd_operand)
=
do_it (operand e1, operand e2)
where
fun do_it (i1 as mcf::IMMED _, i2 as mcf::IMMED _)
=>
{ move (i1, rd_operand);
annotate_and_emit_expression (mcf::BINARY { bin_op=>mcf::IMULL, dst=>rd_operand, src=>i2 }, notes);
};
do_it (rm, i2 as mcf::IMMED _)
=>
do_it (i2, rm);
do_it (imm as mcf::IMMED (i), rm)
=>
annotate_and_emit_expression (mcf::MUL3 { dst=>rd, src1=>rm, src2=>i }, notes);
do_it (r1 as mcf::DIRECT _, r2 as mcf::DIRECT _)
=>
{ move (r1, rd_operand);
annotate_and_emit_expression (mcf::BINARY { bin_op=>mcf::IMULL, dst=>rd_operand, src=>r2 }, notes);
};
do_it (r1 as mcf::DIRECT _, rm)
=>
{ move (r1, rd_operand);
annotate_and_emit_expression (mcf::BINARY { bin_op=>mcf::IMULL, dst=>rd_operand, src=>rm }, notes);
};
do_it (rm, r as mcf::DIRECT _)
=>
do_it (r, rm);
do_it (rm1, rm2)
=>
if (same_as_dest_reg rm2)
tmp_r = make_int_codetemp_info ();
tmp = mcf::DIRECT tmp_r;
move (rm1, tmp);
annotate_and_emit_expression (mcf::BINARY { bin_op=>mcf::IMULL, dst=>tmp, src=>rm2 }, notes);
move (tmp, rd_operand);
else
move (rm1, rd_operand);
annotate_and_emit_expression (mcf::BINARY { bin_op=>mcf::IMULL, dst=>rd_operand, src=>rm2 }, notes);
fi;
end;
end
); # fn
#
fun multiply_notrap (e1, e2 as tcf::LITERAL n')
=>
case (power_of_two_check n')
#
(_, THE (isneg, _, p))
=>
{
r1 = expr e1;
o1 = mcf::DIRECT r1;
if isneg put_base_op (mcf::UNARY { un_op => mcf::NEGL, operand => o1 } ); fi;
shift (mcf::SHLL, tcf::CODETEMP_INFO (32, r1), p);
};
_ => multiply (e1, e2);
esac;
multiply_notrap (e1 as tcf::LITERAL _, e2) => multiply_notrap (e2, e1);
multiply_notrap (e1, e2) => multiply (e1, e2);
end;
fun gen_load (mv_op, ea, ramregion) # Emit a load instruction; make sure that the destination is a register:
=
dst_must_be_reg
(\\ (_, dst)
=
annotate_and_emit_expression
(
mcf::MOVE { mv_op, src=>address (ea, ramregion), dst },
notes
) );
fun load8 (ea, ramregion) = gen_load (mcf::MOVZBL, ea, ramregion); # Generate zero-extended loads.
fun load16 (ea, ramregion) = gen_load (mcf::MOVZWL, ea, ramregion);
fun load8s (ea, ramregion) = gen_load (mcf::MOVSBL, ea, ramregion);
fun load16s (ea, ramregion) = gen_load (mcf::MOVSWL, ea, ramregion);
fun load32 (ea, ramregion) = gen_load (mcf::MOVL, ea, ramregion);
# Generate sign-extended loads.
# Generate setcc instruction:
# semantics: MOVE_INT (rd, CONDITIONAL_LOAD (_, tcf::CMP (type, cc, t1, t2), yes, no))
# Bug, if eax is either t1 or t2 then problem will occur!!!
# Note that we have to use eax as the destination of the
# setcc because it only works on the registers
# %al, %bl, %cl, %dl and %[abcd]h. The last four registers
# are inaccessible in 32 bit mode.
#
fun setcc (type, cc, t1, t2, yes, no)
=
{ my (cc, yes, no)
=
if (yes > no) (cc, yes, no);
else (tcp::negate_cond cc, no, yes);
fi;
# Clear the destination first because
# SETcc only sets the low order byte:
#
case (yes, no, cc)
#
(1, 0, tcf::LT)
=>
{ tmp = mcf::DIRECT (expr (tcf::SUB (32, t1, t2)));
move (tmp, rd_operand);
put_base_op (mcf::BINARY { bin_op=>mcf::SHRL, src=>mcf::IMMED 31, dst=>rd_operand } );
};
(1, 0, tcf::GT)
=>
{ tmp = mcf::DIRECT (expr (tcf::SUB (32, t1, t2)));
put_base_op (mcf::UNARY { un_op=>mcf::NOTL, operand=>tmp } );
move (tmp, rd_operand);
put_base_op (mcf::BINARY { bin_op=>mcf::SHRL, src=>mcf::IMMED 31, dst=>rd_operand } );
};
(1, 0, _) # normal case
=>
{ cc = cmp (TRUE, type, cc, t1, t2, []);
annotate_and_emit_expression (mcf::SET { cond => cond cc, operand=>eax }, notes);
put_base_op (mcf::BINARY { bin_op=>mcf::ANDL, src=>mcf::IMMED 255, dst=>eax } );
move (eax, rd_operand);
};
(c1, c2, _)
=>
# general case;
# from the Intel optimization guide p3-5
#
{ zero eax;
cc = cmp (TRUE, type, cc, t1, t2, []);
#
fun c19 (base, scale)
=
{
address = mcf::INDEXED { base,
index=>rgk::eax,
scale,
disp=>mcf::IMMED c2,
ramregion=>readonly };
tmp_r = make_int_codetemp_info ();
tmp = mcf::DIRECT tmp_r;
put_base_op (mcf::SET { cond=>cond cc, operand=>eax } );
annotate_and_emit_expression (mcf::LEA { r32=>tmp_r, address }, notes);
move (tmp, rd_operand);
};
case (c1-c2)
#
1 => c19 (NULL, 0);
2 => c19 (NULL, 1);
3 => c19 (THE rgk::eax, 1);
4 => c19 (NULL, 2);
5 => c19 (THE rgk::eax, 2);
8 => c19 (NULL, 3);
9 => c19 (THE rgk::eax, 3);
dd =>
{ put_base_op (mcf::SET { cond=>cond (tcp::negate_cond cc), operand=>eax } );
put_base_op (mcf::UNARY { un_op=>mcf::DECL, operand=>eax } );
put_base_op (mcf::BINARY { bin_op=>mcf::ANDL, src=>mcf::IMMED dd, dst=>eax } );
if (c2 == 0)
#
move (eax, rd_operand);
else
tmp_r = make_int_codetemp_info ();
tmp = mcf::DIRECT tmp_r;
annotate_and_emit_expression (mcf::LEA { address=>
mcf::DISPLACE {
base=>rgk::eax,
disp=>mcf::IMMED c2,
ramregion=>readonly },
r32=>tmp_r }, notes);
move (tmp, rd_operand);
fi;
};
esac;
};
esac;
}; # fun setcc
# Generate cmovcc instruction.
# on Pentium Pro and Pentium II only
#
fun cmovcc (type, cc, t1, t2, yes, no)
=
dst_must_be_reg gen_cmov
where
fun gen_cmov (dst_r, _)
=
{ do_expression (no, dst_r, []); # FALSE branch
#
cc = cmp (TRUE, type, cc, t1, t2, []); # Compare
annotate_and_emit_expression
(
mcf::CMOV { cond => cond cc,
src => reg_or_mem (operand yes),
dst => dst_r
},
notes
);
};
end;
#
fun unknown_expression expression
=
do_expression (tct::compile_int_expression expression, rd, notes);
# Add n to rd:
#
fun add_n n
=
{ n = operand n;
src = if (is_ramreg rd) immed_or_reg n;
else n;
fi;
annotate_and_emit_expression
(
mcf::BINARY { bin_op => mcf::ADDL,
src,
dst => rd_operand
},
notes
);
};
#
fun addition (e1, e2) # Generate addition.
=
case e1
tcf::CODETEMP_INFO(_, rs) => if (rkj::codetemps_are_same_color (rs, rd)) add_n e2;
else addition1 (e1, e2);
fi;
_ => addition1 (e1, e2);
esac
also
fun addition1 (e1, e2)
=
case e2
#
tcf::CODETEMP_INFO(_, rs) => if (rkj::codetemps_are_same_color (rs, rd)) add_n e1;
else addition2 (e1, e2);
fi;
_ => addition2 (e1, e2);
esac
also
fun addition2 (e1, e2)
=
dst_must_be_reg
(\\ (dst_r, _)
=
annotate_and_emit_expression
(
mcf::LEA { r32=>dst_r, address=>address (expression, readonly) },
notes
)
)
except
EA = binary_comm (mcf::ADDL, e1, e2);
case expression
#
tcf::CODETEMP_INFO(_, rs)
=>
if (is_ramreg rs and is_ramreg rd)
#
tmp = mcf::DIRECT (make_int_codetemp_info ());
move'(mcf::RAMREG rs, tmp, notes);
move'(tmp, rd_operand, []);
else
move'(ea_of_int_reg rs, rd_operand, notes);
fi;
tcf::LITERAL z
=>
{
n = to_int1 z;
if (n != 0)
#
move'(mcf::IMMED (n), rd_operand, notes);
else
# As per Fermin's request, special speedup for rd := 0.
# Currently we don't bother with the size.
#
if (is_ramreg rd)
#
move'(mcf::IMMED 0, rd_operand, notes);
else
annotate_and_emit_expression
(
mcf::BINARY { bin_op => mcf::XORL,
src => rd_operand,
dst => rd_operand
},
notes
);
fi;
fi;
};
(tcf::LATE_CONSTANT _
| tcf::LABEL _)
=>
move'(mcf::IMMED_LABEL expression, rd_operand, notes);
tcf::LABEL_EXPRESSION le
=>
move'(mcf::IMMED_LABEL le, rd_operand, notes);
# 32-bit addition
#
tcf::ADD (32, e1, e2 as tcf::LITERAL n)
=>
{
n = to_int1 n;
case n
1 => unary (mcf::INCL, e1);
-1 => unary (mcf::DECL, e1);
_ => addition (e1, e2);
esac;
};
tcf::ADD (32, e1 as tcf::LITERAL n, e2)
=>
{
n = to_int1 n;
case n
1 => unary (mcf::INCL, e2);
-1 => unary (mcf::DECL, e2);
_ => addition (e1, e2);
esac;
};
tcf::ADD (32, e1, e2)
=>
addition (e1, e2);
# 32-bit addition but set the flag!
# This is a stupid hack for now. XXX BUGGO FIXME
#
tcf::ADD (0, e, e1 as tcf::LITERAL n)
=>
{ n = tcf::mi::to_int (32, n);
#
if (n == 1) unary (mcf::INCL, e);
elif (n == -1) unary (mcf::DECL, e);
else binary_comm (mcf::ADDL, e, e1);
fi;
};
tcf::ADD (0, e1 as tcf::LITERAL n, e)
=>
{ n = tcf::mi::to_int (32, n);
#
if (n == 1) unary (mcf::INCL, e);
elif (n == -1) unary (mcf::DECL, e);
else binary_comm (mcf::ADDL, e1, e);
fi;
};
tcf::ADD (0, e1, e2)
=>
binary_comm (mcf::ADDL, e1, e2);
# 32-bit subtraction:
#
tcf::SUB (32, e1, e2 as tcf::LITERAL n)
=>
{ n = to_int1 n;
case n
#
0 => do_expression (e1, rd, notes);
1 => unary (mcf::DECL, e1);
-1 => unary (mcf::INCL, e1);
_ => binary (mcf::SUBL, e1, e2);
esac;
};
tcf::SUB (32, e1 as tcf::LITERAL n, e2)
=>
if (n == 0) unary (mcf::NEGL, e2);
else binary (mcf::SUBL, e1, e2);
fi;
tcf::SUB (32, e1, e2) => binary (mcf::SUBL, e1, e2);
tcf::MULU (32, x, y) => u_multiply (x, y);
tcf::DIVU (32, x, y) => divide (FALSE, FALSE, x, y);
tcf::REMU (32, x, y) => rem (FALSE, x, y);
tcf::MULS ( 32, x, y) => multiply_notrap (x, y);
tcf::DIVS (tcf::d::ROUND_TO_ZERO, 32, x, y) => divide (TRUE, FALSE, x, y); # d:: is a special rounding mode just for divide instructions.
tcf::DIVS (tcf::d::ROUND_TO_NEGINF, 32, x, y) => divinf (FALSE, x, y); # ROUND_TO_NEGINF is quite slow on Intel -- we must fake it in software.
tcf::REMS (tcf::d::ROUND_TO_ZERO, 32, x, y) => rem (TRUE, x, y);
tcf::REMS (tcf::d::ROUND_TO_NEGINF, 32, x, y) => reminf (x, y); # ROUND_TO_NEGINF is quite slow on Intel -- we must fake it in software.
tcf::ADD_OR_TRAP (32, x, y) => { binary_comm (mcf::ADDL, x, y); put_branch_on_overflow (); };
tcf::SUB_OR_TRAP (32, x, y) => { binary (mcf::SUBL, x, y); put_branch_on_overflow (); };
tcf::MULS_OR_TRAP (32, x, y) => { multiply ( x, y); put_branch_on_overflow (); };
tcf::DIVS_OR_TRAP (tcf::d::ROUND_TO_ZERO, 32, x, y) => divide (TRUE, TRUE, x, y);
tcf::DIVS_OR_TRAP (tcf::d::ROUND_TO_NEGINF, 32, x, y) => divinf (TRUE, x, y);
tcf::BITWISE_AND (32, x, y) => binary_comm (mcf::ANDL, x, y);
tcf::BITWISE_OR (32, x, y) => binary_comm (mcf::ORL, x, y);
tcf::BITWISE_XOR (32, x, y) => binary_comm (mcf::XORL, x, y);
tcf::BITWISE_NOT (32, x) => unary (mcf::NOTL, x);
tcf::RIGHT_SHIFT (32, x, y) => shift (mcf::SARL, x, y);
tcf::RIGHT_SHIFT_U (32, x, y) => shift (mcf::SHRL, x, y);
tcf::LEFT_SHIFT (32, x, y) => shift (mcf::SHLL, x, y);
tcf::LOAD (8, ea, ramregion) => load8 (ea, ramregion);
tcf::LOAD (16, ea, ramregion) => load16 (ea, ramregion);
tcf::LOAD (32, ea, ramregion) => load32 (ea, ramregion);
tcf::SIGN_EXTEND (32, 8, tcf::LOAD ( 8, ea, ramregion)) => load8s (ea, ramregion);
tcf::SIGN_EXTEND (32, 16, tcf::LOAD (16, ea, ramregion)) => load16s (ea, ramregion);
#
tcf::ZERO_EXTEND (32, 8, tcf::LOAD ( 8, ea, ramregion)) => load8 (ea, ramregion);
tcf::ZERO_EXTEND (32, 16, tcf::LOAD (16, ea, ramregion)) => load16 (ea, ramregion);
tcf::CONDITIONAL_LOAD (32, tcf::CMP (type, cc, t1, t2), y as tcf::LITERAL yes, n as tcf::LITERAL no)
=>
case *architecture # PentiumPro and higher has CMOVcc
#
PENTIUM => setcc (type, cc, t1, t2, to_int1 yes, to_int1 no);
_ => cmovcc (type, cc, t1, t2, y, n);
esac;
tcf::CONDITIONAL_LOAD (32, tcf::CMP (type, cc, t1, t2), yes, no)
=>
case *architecture # PentiumPro and higher has CMOVcc
#
PENTIUM => unknown_expression expression;
_ => cmovcc (type, cc, t1, t2, yes, no);
esac;
tcf::LET (s, e) => { do_void_expression s; do_expression (e, rd, notes);};
tcf::RNOTE (e, lnt::MARKREG f) => { f rd; do_expression (e, rd, notes);};
tcf::RNOTE (e, a) => do_expression (e, rd, a ! notes);
tcf::PRED (e, c) => do_expression (e, rd, lnt::CONTROL_DEPENDENCY_USE c ! notes);
tcf::REXT e => txc::compile_rext (reducer()) { e, rd, notes };
# Simplify and try again:
#
expression => unknown_expression expression;
esac;
} # fun do_expression
# Generate an expression and return its result register.
# If rewrite_pseudo is on, the result is guaranteed
# to be in a non-ramreg register:
#
also
fun expr (expression as tcf::CODETEMP_INFO(_, rd))
=>
if (is_ramreg rd) gen_expr expression;
else rd;
fi;
expr expression
=>
gen_expr expression;
end
also
fun gen_expr expression
=
{ rd = make_int_codetemp_info ();
do_expression (expression, rd, []);
rd;
}
# Compare an expression with zero.
# On the intel32, TEST is superior to AND for doing the same thing,
# since it doesn't need to write out the result in a register.
#
also
fun cmp_with_zero (cc as (tcf::EQ
| tcf::NE), e as tcf::BITWISE_AND (type, a, b), notes)
=>
{ case type
#
8 => test (mcf::TESTB, a, b, notes);
16 => test (mcf::TESTW, a, b, notes);
32 => test (mcf::TESTL, a, b, notes);
#
_ => do_expression (e, make_int_codetemp_info (), notes);
esac;
cc;
};
cmp_with_zero (cc, e, notes)
=>
{ e = case e # hack to disable the lea tweak XXX
#
tcf::ADD (_, a, b) => tcf::ADD (0, a, b);
e => e;
esac;
do_expression (e, make_int_codetemp_info (), notes);
cc;
};
end
# Emit a test.
# The available modes are
# r/m, r
# r/m, imm
# On selecting the right instruction: TESTL/TESTW/TESTB.
# When ANDing an operand with a constant
# that fits within 8 (or 16) bits, it is possible to use TESTB,
# (or TESTW) instead of TESTL. Because intel32 is little endian,
# this works for memory operands too. However, with TESTB, it is
# not possible to use registers other than
# AL, CL, BL, DL, and AH, CH, BH, DH. So, the best way is to
# perform register allocation first, and if the operand registers
# are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction
# by TESTB.
#
also
fun test (testopcode, a, b, notes)
=
{ (maybe_commute_comparison (tcf::EQ, TRUE, a, b))
->
(_, operand1, operand2);
# translate r, r/m => r/m, r
#
my (operand1, operand2)
=
if (is_mem_operand operand2) (operand2, operand1);
else (operand1, operand2);
fi;
annotate_and_emit_expression
(
testopcode { lsrc=>operand1, rsrc=>operand2 },
notes
);
}
# %eflags <- src
also
fun move_to_eflags src
=
if (not (rkj::codetemps_are_same_color (src, rgk::eflags)))
#
move (mcf::DIRECT src, eax);
put_base_op mcf::LAHF;
fi
# dst <- %eflags
also
fun move_from_eflags dst
=
if (not (rkj::codetemps_are_same_color (dst, rgk::eflags)))
#
put_base_op mcf::SAHF;
move (eax, mcf::DIRECT dst);
fi
# Generate a condition code expression.
# The zero is for setting the condition code!
# I have no idea why this is used.
#
also
fun do_flag_expression (tcf::CMP (type, cc, t1, t2), rd, notes) # flag expressions handle zero/parity/overflow/... flag stuff.
=>
{ cmp (FALSE, type, cc, t1, t2, notes);
move_from_eflags rd;
};
do_flag_expression (tcf::CC (cond, rs), rd, notes)
=>
if (rkj::codetemps_are_same_color (rs, rgk::eflags)
or rkj::codetemps_are_same_color (rd, rgk::eflags) )
#
move_to_eflags rs;
move_from_eflags rd;
else
move'(mcf::DIRECT rs, mcf::DIRECT rd, notes);
fi;
do_flag_expression (tcf::CCNOTE (e, lnt::MARKREG f), rd, notes)
=>
{ f rd;
do_flag_expression (e, rd, notes);
};
do_flag_expression (tcf::CCNOTE (e, a), rd, notes)
=>
do_flag_expression (e, rd, a ! notes);
do_flag_expression (tcf::CCEXT e, cd, notes)
=>
txc::compile_ccext (reducer()) { e, ccd=>cd, notes };
do_flag_expression _
=>
error "do_flag_expression";
end
also
fun cc_expr e
=
error "cflag_expression"
# Generate a comparison and set the condition code;
# Return the actual cc used.
# If 'swappable' is TRUE we can reorder the operands.
#
also
fun cmp (swappable, type, cc, t1, t2, notes)
=
# == and != can be always be reordered
{
swappable = swappable
or cc == tcf::EQ
or cc == tcf::NE;
# Sometimes the comparison
# is not necessary because
# the condition-register bits
# are already set.
if (expression_is_zero t1
and expression_affects_zero_flag2 t2)
#
if swappable cmp_with_zero (tcp::swap_cond cc, t2, notes);
else gen_cmp (type, FALSE, cc, t1, t2, notes); # Can't reorder the comparison.
fi;
#
elif (expression_is_zero t2
and expression_affects_zero_flag2 t1)
#
cmp_with_zero (cc, t1, notes);
#
else
gen_cmp (type, swappable, cc, t1, t2, notes);
fi;
}
also
fun maybe_commute_comparison (cc, swappable, a, b)
=
# Given a and b which are the operands to a comparison (or test),
# return the appropriate condition code and operands.
# The available modes are:
# r/m, imm
# r/m, r
# r, r/m
{
operand1 = operand a;
operand2 = operand b;
# Try to fold in the operands whenever possible:
case ( is_immediate operand1,
is_immediate operand2
)
#
(TRUE, TRUE)
=>
(cc, move_to_reg operand1, operand2);
(TRUE, FALSE)
=>
if swappable (tcp::swap_cond cc, operand2, operand1);
else (cc, move_to_reg operand1, operand2);
fi;
(FALSE, TRUE)
=>
(cc, operand1, operand2);
(FALSE, FALSE)
=>
case (operand1, operand2)
#
(_, mcf::DIRECT _) => (cc, operand1, operand2);
(mcf::DIRECT _, _) => (cc, operand1, operand2);
(_, _) => (cc, move_to_reg operand1, operand2);
esac;
esac;
}
# Generate an actual comparison;
# return the actual cc used:
#
also
fun gen_cmp (type, swappable, cc, a, b, notes)
=
{ (maybe_commute_comparison (cc, swappable, a, b))
->
(cc, operand1, operand2);
annotate_and_emit_expression (mcf::CMPL { lsrc=>operand1, rsrc=>operand2 }, notes);
cc;
}
# Generate code for jumps:
#
also
fun do_goto (label_expression as tcf::LABEL (codelabel: lbl::Codelabel), _, notes) # Simple and common case -- jump to single known destination.
=>
annotate_and_emit_expression (mcf::JMP (mcf::IMMED_LABEL label_expression, [codelabel]), notes);
do_goto (tcf::LABEL_EXPRESSION label_expression, possible_destinations: List(lbl::Codelabel), notes) # possible_destinations will be empty if not known.
=>
annotate_and_emit_expression (mcf::JMP (mcf::IMMED_LABEL label_expression, possible_destinations), notes);
do_goto (ea, labs, notes) # Arbitrary computed goto. "ea" == "effective address".
=>
annotate_and_emit_expression (mcf::JMP (operand ea, labs), notes);
end
# Convert tcf::Expression to registerset:
#
also
fun tcfexpression_to_registerset expression
=
g (expression, rgk::empty_codetemplists)
where
add_ccreg = rkj::cls::add_codetemp_to_appropriate_kindlist;
#
fun g ([], acc) => acc;
g (tcf::INT_EXPRESSION (tcf::CODETEMP_INFO(_, r)) ! regs, acc) => g (regs, rgk::add_codetemp_info_to_appropriate_kindlist (r, acc));
g (tcf::FLOAT_EXPRESSION (tcf::CODETEMP_INFO_FLOAT(_, f)) ! regs, acc) => g (regs, rgk::add_codetemp_info_to_appropriate_kindlist (f, acc));
#
g (tcf::FLAG_EXPRESSION (tcf::CC (_, cc)) ! regs, acc) => g (regs, add_ccreg (cc, acc)); # flag expressions handle zero/parity/overflow/... flag stuff.
g (tcf::FLAG_EXPRESSION (tcf::FCC(_, cc)) ! regs, acc) => g (regs, add_ccreg (cc, acc));
g(_ ! regs, acc) => g (regs, acc);
end;
end
# Generate code for calls:
#
also
fun do_call (ea, flow, def, uses, ramregion, cuts_to, notes, pops)
=
annotate_and_emit_expression
(
mcf::CALL
{
operand => operand ea,
#
defs => tcfexpression_to_registerset def,
uses => tcfexpression_to_registerset uses,
#
return => return (rgk::empty_codetemplists, notes),
#
cuts_to,
ramregion,
pops
},
notes
)
where
fun return (set, [])
=>
set;
return (set, a ! notes)
=>
case (lnt::return_arg.peek a)
#
THE r => return (rkj::cls::add_codetemp_to_appropriate_kindlist (r, set), notes);
NULL => return (set, notes);
esac;
end;
end
# Generate code for integer stores; first move data to %eax
# This is mainly because we can't allot to registers like
# ah, dl, dx etc.
#
also
fun gen_store (mv_op, ea, d, ramregion, notes)
=
{ src = case (immed_or_reg (operand d))
#
src as mcf::DIRECT r
=>
if (rkj::codetemps_are_same_color (r, rgk::eax))
#
src;
else
move (src, eax);
eax;
fi;
src => src;
esac;
annotate_and_emit_expression (mcf::MOVE { mv_op, src, dst=>address (ea, ramregion) }, notes);
}
# Generate code for 8-bit integer stores
# movb has to use %eax as source. Stupid intel32!
#
also
fun store8 (ea, d, ramregion, notes)
=
gen_store (mcf::MOVB, ea, d, ramregion, notes)
also
fun store16 (ea, d, ramregion, notes)
=
annotate_and_emit_expression
(
mcf::MOVE
{
mv_op => mcf::MOVW,
src => immed_or_reg (operand d),
dst => address (ea, ramregion)
},
notes
)
also
fun store32 (ea, d, ramregion, notes)
=
move'
( immed_or_reg (operand d),
address (ea, ramregion),
notes
)
# Generate code for branching:
#
also
fun branch (tcf::CMP (type, cc, t1, t2), lab, notes)
=>
# Allow reordering of operands:
#
{ cc = cmp (TRUE, type, cc, t1, t2, []);
#
annotate_and_emit_expression
(
mcf::JCC { cond => cond cc, operand => immed_label lab },
notes
);
};
branch (tcf::FCMP (fty, fcc, t1, t2), lab, notes)
=>
fbranch (fty, fcc, t1, t2, lab, notes);
branch (flag_expression, lab, notes)
=>
{ do_flag_expression (flag_expression, rgk::eflags, []);
#
annotate_and_emit_expression
(
mcf::JCC
{
cond => cond (tct::cond_of flag_expression),
operand => immed_label lab
},
notes
);
};
end
# Generate code for floating point
# compare and branch:
#
also
fun fbranch (fty, fcc, t1, t2, lab, notes)
=
{ fun j cc
=
annotate_and_emit_expression
(
mcf::JCC { cond => cc, operand => immed_label lab },
notes
);
#
fbranching (fty, fcc, t1, t2, j);
}
also
fun fbranching (fty, fcc, t1, t2, j)
=
{ fun ignore_order (tcf::CODETEMP_INFO_FLOAT _) => TRUE;
ignore_order (tcf::FLOAD _) => TRUE;
ignore_order (tcf::FNOTE (e, _)) => ignore_order e;
ignore_order _ => FALSE;
end;
#
fun compare'() # Sethi-Ullman style
=
{ if ( ignore_order t1
or ignore_order t2
)
reduce_float_expression (fty, t2, []);
reduce_float_expression (fty, t1, []);
else
reduce_float_expression (fty, t1, []);
reduce_float_expression (fty, t2, []);
put_base_op (mcf::FXCH { operand=>rgk::st (1) });
fi;
put_base_op mcf::FUCOMPP;
fcc;
};
#
fun compare''()
=
# Direct style
# Try to make lsrc the memory operand
#
{ lsrc = foperand (fty, t1);
rsrc = foperand (fty, t2);
fsize = fsize fty;
#
fun cmp (lsrc, rsrc, fcc)
=
{ i = *architecture != PENTIUM;
put_base_op (mcf::FCMP { i, fsize, lsrc, rsrc } );
fcc;
};
case (lsrc, rsrc)
#
(mcf::FPR _, mcf::FPR _)
=>
cmp (lsrc, rsrc, fcc);
(mcf::FPR _, mem)
=>
cmp (mem, lsrc, tcp::swap_fcond fcc);
(mem, mcf::FPR _)
=>
cmp (lsrc, rsrc, fcc);
(lsrc, rsrc) # Can't be both memory!
=>
{ ftmp_r = make_float_codetemp_info();
ftmp = mcf::FPR ftmp_r;
put_base_op (mcf::FMOVE { fsize, src=>rsrc, dst=>ftmp } );
cmp (lsrc, ftmp, fcc);
};
esac;
};
#
fun compare ()
=
if (enable_fast_fpmode and *fast_floating_point)
#
compare''();
else compare' ();
fi;
#
fun andil i = put_base_op (mcf::BINARY { bin_op=>mcf::ANDL, src=>mcf::IMMED (i), dst=>eax } );
fun testil i = put_base_op (mcf::TESTL { lsrc=>eax, rsrc=>mcf::IMMED (i) } );
fun xoril i = put_base_op (mcf::BINARY { bin_op=>mcf::XORL, src=>mcf::IMMED (i), dst=>eax } );
fun cmpil i = put_base_op (mcf::CMPL { rsrc=>mcf::IMMED (i), lsrc=>eax } );
fun sahf () = put_base_op (mcf::SAHF);
#
fun branch fcc
=
case fcc
#
tcf::FEQ => { andil 0x4400; xoril 0x4000; j (mcf::EQ);};
tcf::FNEU => { andil 0x4400; xoril 0x4000; j (mcf::NE);};
tcf::FUO => { sahf(); j (mcf::PP);};
tcf::FGLE => { sahf(); j (mcf::NP);};
tcf::FGT => { testil 0x4500; j (mcf::EQ);};
tcf::FLEU => { testil 0x4500; j (mcf::NE);};
tcf::FGE => { testil 0x500; j (mcf::EQ);};
tcf::FLTU => { testil 0x500; j (mcf::NE);};
tcf::FLT => { andil 0x4500; cmpil 0x100; j (mcf::EQ);};
tcf::FGEU => { andil 0x4500; cmpil 0x100; j (mcf::NE);};
tcf::FLE => { andil 0x4100; cmpil 0x100; j (mcf::EQ);
cmpil 0x4000; j (mcf::EQ);
};
tcf::FGTU => { sahf(); j (mcf::PP); testil 0x4100; j (mcf::EQ);};
tcf::FNE => { testil 0x4400; j (mcf::EQ);};
tcf::FEQU => { testil 0x4400; j (mcf::NE);};
#
_ => error (cat [
"fbranch(", tcp::fcond_to_string fcc, ")"
]);
esac;
# P Z C
# x < y 0 0 1
# x > y 0 0 0
# x = y 0 1 0
# unordered 1 1 1
# When it's unordered, all three flags, P, Z, C are set.
#
fun fast_branch fcc
=
case fcc
#
tcf::FEQ => ordered_only (mcf::EQ);
tcf::FNEU => { j (mcf::PP); j (mcf::NE);};
tcf::FUO => j (mcf::PP);
tcf::FGLE => j (mcf::NP);
tcf::FGT => ordered_only (mcf::AA);
tcf::FLEU => j (mcf::BE);
tcf::FGE => ordered_only (mcf::AE);
tcf::FLTU => j (mcf::BB);
tcf::FLT => ordered_only (mcf::BB);
tcf::FGEU => { j (mcf::PP); j (mcf::AE);};
tcf::FLE => ordered_only (mcf::BE);
tcf::FGTU => { j (mcf::PP); j (mcf::AA);};
tcf::FNE => ordered_only (mcf::NE);
tcf::FEQU => j (mcf::EQ);
#
_ => error (cat [
"fbranch(", tcp::fcond_to_string fcc, ")"
]);
esac
also
fun ordered_only fcc
=
{ label = lbl::make_anonymous_codelabel ();
#
put_base_op (mcf::JCC { cond => mcf::PP, operand => immed_label label } );
#
j fcc;
#
buf.put_private_label label;
};
fcc = compare ();
if ( *architecture != PENTIUM
and (enable_fast_fpmode and *fast_floating_point)
)
fast_branch fcc;
else
put_base_op mcf::FNSTSW;
branch fcc;
fi;
}
# ========================================================
# Floating point code generation starts here.
# Some generic fp routines first.
# ========================================================
# Can this tree be folded into the src operand of a floating point
# operations?
#
also
fun foldable_float_expression (tcf::CODETEMP_INFO_FLOAT _) => TRUE;
foldable_float_expression (tcf::FLOAD _) => TRUE;
foldable_float_expression (tcf::INT_TO_FLOAT(_, (16
| 32), _)) => TRUE;
foldable_float_expression (tcf::FLOAT_TO_FLOAT(_, _, t)) => foldable_float_expression t;
foldable_float_expression (tcf::FNOTE (t, _)) => foldable_float_expression t;
foldable_float_expression _ => FALSE;
end
# Move integer e of size type into a memory location.
# Returns a quadruple:
# (INTEGER, return type, effect address of memory location, cleanup code)
#
also
fun convert_int_to_float (type, e)
=
{ operand = operand e;
if (is_mem_operand operand and (type == 16 or type == 32))
#
(INTEGER, type, operand, []);
else
(convert_int_to_float_in_registers { type, src=>operand, ref_notes => buf.get_notes() })
->
{ ops, temp_mem, cleanup };
put_ops ops;
(INTEGER, 32, temp_mem, cleanup);
fi;
}
##########################################################
# Sethi-Ullman based floating point code
# generation as implemented by Lal George
##########################################################
also
fun fld (32, operand) => mcf::FLDS operand;
fld (64, operand) => mcf::FLDL operand;
fld (80, operand) => mcf::FLDT operand;
fld _ => error "fld";
end
also
fun fild (16, operand) => mcf::FILD operand;
fild (32, operand) => mcf::FILDL operand;
fild (64, operand) => mcf::FILDLL operand;
#
fild _ => error "fild";
end
also
fun fxld (INTEGER, type, operand) => fild (type, operand);
fxld (FLOAT, fty, operand) => fld (fty, operand);
end
also
fun fstp (32, operand) => mcf::FSTPS operand;
fstp (64, operand) => mcf::FSTPL operand;
fstp (80, operand) => mcf::FSTPT operand;
#
fstp _ => error "fstp";
end
# Generate code for floating point stores:
#
also
fun fstore'(fty, ea, d, ramregion, notes)
=
{ case d
#
tcf::CODETEMP_INFO_FLOAT (fty, fs) => put_base_op (fld (fty, mcf::FDIRECT fs));
_ => reduce_float_expression (fty, d, []);
esac;
annotate_and_emit_expression (fstp (fty, address (ea, ramregion)), notes);
}
# Generate code for floating point loads:
#
also
fun fload'(fty, ea, ramregion, fd, notes)
=
{ ea = address (ea, ramregion);
annotate_and_emit_expression (fld (fty, ea), notes);
if (rkj::codetemps_are_same_color (fd, st0))
#
put_base_op (fstp (fty, mcf::FDIRECT fd));
fi;
}
also
fun float_expression' e
=
{ reduce_float_expression (64, e, []);
rgk::st (0);
}
also # Compute value of expression to 'fty'-bit precision, leave result in 'to_reg'.
fun do_float_expression' (fty, tcf::CODETEMP_INFO_FLOAT(_, fs), to_reg, notes) # This is "slow" floating point -- for "fast" see: do_float_expression''
=>
if (rkj::codetemps_are_same_color (fs, to_reg)) # What happens if they are not the same color? -- 2011-06-02 CrT
#
annotate_and_emit_expression'
(
mcf::COPY
{ kind => rkj::FLOAT_REGISTER,
size_in_bits => 64, # Ignoring all input size-in-bitss info!
dst => [to_reg],
src => [fs],
tmp => NULL
},
notes
);
fi;
do_float_expression' (_, tcf::FLOAD (fty, ea, ramregion), to_reg, notes)
=>
fload' (fty, ea, ramregion, to_reg, notes);
do_float_expression' (fty, tcf::FEXT float_expression, to_reg, notes)
=>
{ txc::compile_fext (reducer()) { e=>float_expression, fd=>to_reg, notes };
if (not (rkj::codetemps_are_same_color (to_reg, st0)))
#
put_base_op (fstp (fty, mcf::FDIRECT to_reg));
fi;
};
do_float_expression' (fty, e, to_reg, notes)
=>
{ reduce_float_expression (fty, e, []);
if (rkj::codetemps_are_same_color (to_reg, st0))
#
annotate_and_emit_expression (fstp (fty, mcf::FDIRECT to_reg), notes);
fi;
};
end
# Generate floating point expression using Sethi-Ullman's scheme:
# This function evaluates a floating point expression and leaves
# the result in %ST (0) -- top of floating point stack.
#
# If you don't have a copy of the Dragon book you can refer to:
#
# http://en.wikipedia.org/wiki/Sethi%E2%80%93Ullman_algorithm
#
also
fun reduce_float_expression (fty, float_expression, notes)
=
{ st = mcf::ST (rgk::st 0);
st1 = mcf::ST (rgk::st 1);
cleanup_code = REF [] : Ref( List( mcf::Machine_Op ) );
Su_Tree
= LEAF (Int, tcf::Float_Expression, Ans)
| BINARY (Int, tcf::Float_Bitsize, Fbinop, Su_Tree, Su_Tree, Ans)
| UNARY (Int, tcf::Float_Bitsize, mcf::Fun_Op, Su_Tree, Ans)
also
Fbinop
=
FADD
| FSUB | FMUL | FDIV | FIADD | FISUB | FIMUL | FIDIV
withtype Ans = note::Notes;
#
fun label (LEAF (n, _, _)) => n;
label (BINARY (n, _, _, _, _, _)) => n;
label (UNARY (n, _, _, _, _)) => n;
end;
#
fun annotate (LEAF (n, x, notes), a) => LEAF (n, x, a ! notes);
annotate (BINARY (n, t, b, x, y, notes), a) => BINARY (n, t, b, x, y, a ! notes);
annotate (UNARY (n, t, u, x, notes), a) => UNARY (n, t, u, x, a ! notes);
end;
# Generate expression tree with sethi-ullman numbers:
#
fun su (e as tcf::CODETEMP_INFO_FLOAT _) => LEAF (1, e, []);
su (e as tcf::FLOAD _) => LEAF (1, e, []);
su (e as tcf::INT_TO_FLOAT _) => LEAF (1, e, []);
su (tcf::FLOAT_TO_FLOAT(_, _, t)) => su t;
su (tcf::FNOTE (t, a)) => annotate (su t, a);
su (tcf::FABS (fty, t)) => su_unary (fty, mcf::FABS, t);
su (tcf::FNEG (fty, t)) => su_unary (fty, mcf::FCHS, t);
su (tcf::FSQRT (fty, t)) => su_unary (fty, mcf::FSQRT, t);
su (tcf::FADD (fty, t1, t2)) => su_com_binary (fty, FADD, FIADD, t1, t2);
su (tcf::FMUL (fty, t1, t2)) => su_com_binary (fty, FMUL, FIMUL, t1, t2);
su (tcf::FSUB (fty, t1, t2)) => su_binary (fty, FSUB, FISUB, t1, t2);
su (tcf::FDIV (fty, t1, t2)) => su_binary (fty, FDIV, FIDIV, t1, t2);
su _ => error "su";
end
# Try to fold the the memory operand
# or integer conversion:
#
also
fun su_fold (e as tcf::CODETEMP_INFO_FLOAT _) => (LEAF (0, e, []), FALSE);
su_fold (e as tcf::FLOAD _) => (LEAF (0, e, []), FALSE);
su_fold (e as tcf::INT_TO_FLOAT(_, (16
| 32), _)) => (LEAF (0, e, []), TRUE);
su_fold (tcf::FLOAT_TO_FLOAT(_, _, t)) => su_fold t;
su_fold (tcf::FNOTE (t, a))
=>
{ my (t, integer) = su_fold t;
(annotate (t, a), integer);
};
su_fold e
=>
(su e, FALSE);
end
# Form unary tree:
#
also
fun su_unary (fty, funary, t)
=
{ t = su t;
UNARY (label t, fty, funary, t, []);
}
# Form binary tree:
#
also
fun su_binary (fty, binop, ibinop, t1, t2)
=
{ t1 = su t1;
my (t2, integer) = su_fold t2;
n1 = label t1;
n2 = label t2;
n = if (n1==n2) n1+1;
else int::max (n1, n2);
fi;
my_op = integer ?? ibinop
:: binop;
BINARY (n, fty, my_op, t1, t2, []);
}
# Try to fold in the operand if possible.
# This only applies to commutative operations.
#
also
fun su_com_binary (fty, binop, ibinop, t1, t2)
=
{ my (t1, t2)
=
if (foldable_float_expression t2)
(t1, t2);
else (t2, t1);
fi;
su_binary (fty, binop, ibinop, t1, t2);
}
also
fun same_tree (LEAF(_, tcf::CODETEMP_INFO_FLOAT (t1, f1), []),
LEAF(_, tcf::CODETEMP_INFO_FLOAT (t2, f2), []))
=>
t1 == t2 and rkj::codetemps_are_same_color (f1, f2);
same_tree _
=>
FALSE;
end;
# Traverse tree and generate code
#
fun gencode (LEAF(_, t, notes))
=>
annotate_and_emit_expression (fxld (leaf_ea t), notes);
gencode (BINARY(_, _, binop, x, t2 as LEAF (0, y, a1), a2))
=>
{ gencode x;
(leaf_ea y) -> (_, fty, src);
#
fun gen (code)
=
annotate_and_emit_expression (code, a1 @ a2);
#
fun binary (oper32, oper64)
=
if (same_tree (x, t2))
#
gen (mcf::FBINARY { bin_op=>oper64, src=>st, dst=>st } );
else
op = if (not (is_mem_operand src))
#
oper64;
else
case fty
#
32 => oper32;
64 => oper64;
_ => error "gencode: BINARY";
esac;
fi;
gen (mcf::FBINARY { bin_op=>op, src, dst=>st } );
fi;
#
fun ibinary (oper16, oper32)
=
gen (mcf::FIBINARY { bin_op, src } )
where
bin_op = case fty
#
16 => oper16;
32 => oper32;
#
_ => error "gencode: IBINARY";
esac;
end;
case binop
#
FADD => binary (mcf::FADDS, mcf::FADDL);
FSUB => binary (mcf::FDIVS, mcf::FSUBL);
FMUL => binary (mcf::FMULS, mcf::FMULL);
FDIV => binary (mcf::FDIVS, mcf::FDIVL);
FIADD => ibinary (mcf::FIADDS, mcf::FIADDL);
FISUB => ibinary (mcf::FIDIVS, mcf::FISUBL);
FIMUL => ibinary (mcf::FIMULS, mcf::FIMULL);
FIDIV => ibinary (mcf::FIDIVS, mcf::FIDIVL);
esac;
};
gencode (BINARY(_, fty, binop, t1, t2, notes))
=>
{ fun do_it (t1, t2, op, oper_p, oper_rp)
=
{ # op[P] => st (1) := st op st (1); [pop]
# operR[P] => st (1) := st (1) op st; [pop]
n1 = label t1;
n2 = label t2;
if (n1 < n2 and n1 <= 7)
#
gencode t2;
gencode t1;
annotate_and_emit_expression (mcf::FBINARY { bin_op=>oper_p, src=>st, dst=>st1 }, notes);
#
elif (n2 <= n1 and n2 <= 7)
#
gencode t1;
gencode t2;
annotate_and_emit_expression (mcf::FBINARY { bin_op=>oper_rp, src=>st, dst=>st1 }, notes);
else
# Both labels > 7
fs = mcf::FDIRECT (make_float_codetemp_info());
gencode t2;
put_base_op (fstp (fty, fs));
gencode t1;
annotate_and_emit_expression (mcf::FBINARY { bin_op=>op, src=>fs, dst=>st }, notes);
fi;
};
case binop
#
FADD => do_it (t1, t2, mcf::FADDL, mcf::FADDP, mcf::FADDP);
FMUL => do_it (t1, t2, mcf::FMULL, mcf::FMULP, mcf::FMULP);
FSUB => do_it (t1, t2, mcf::FSUBL, mcf::FSUBP, mcf::FSUBRP);
FDIV => do_it (t1, t2, mcf::FDIVL, mcf::FDIVP, mcf::FDIVRP);
#
_ => error "gencode::BINARY";
esac;
};
gencode (UNARY(_, _, unary_op, su, notes))
=>
{ gencode (su);
annotate_and_emit_expression (mcf::FUNARY (unary_op), notes);
};
end
# Generate code for a leaf.
# Returns the type and an effective address
#
also
fun leaf_ea (tcf::CODETEMP_INFO_FLOAT (fty, f)) => (FLOAT, fty, mcf::FDIRECT f);
leaf_ea (tcf::FLOAD (fty, ea, ramregion)) => (FLOAT, fty, address (ea, ramregion));
leaf_ea (tcf::INT_TO_FLOAT(_, 32, t)) => int2real (32, t);
leaf_ea (tcf::INT_TO_FLOAT(_, 16, t)) => int2real (16, t);
leaf_ea (tcf::INT_TO_FLOAT(_, 8, t)) => int2real (8, t);
leaf_ea _ => error "leafEA";
end
also
fun int2real (type, e)
=
{ (convert_int_to_float (type, e))
->
(_, type, ea, cleanup);
cleanup_code := *cleanup_code @ cleanup;
(INTEGER, type, ea);
};
gencode (su float_expression);
put_ops *cleanup_code;
} # reduceFexp
# ========================================================
# This section generates 3-address style floating
# point code.
# ========================================================
also
fun isize 16 => mcf::INT16;
isize 32 => mcf::INT1;
isize _ => error "isize";
end
also
fun fstore''(fty, ea, d, ramregion, notes) # "fast" floating point; for "slow" see: fstore'
=
{ floating_point_used := TRUE;
annotate_and_emit_expression
(
mcf::FMOVE { fsize => fsize fty,
dst => address (ea, ramregion),
src => foperand (fty, d)
},
notes
);
}
also
fun fload''(fty, ea, ramregion, d, notes)
=
{ floating_point_used := TRUE;
annotate_and_emit_expression (mcf::FMOVE { fsize=>fsize fty, src=>address (ea, ramregion), dst=>ea_of_float_reg d }, notes);
}
also
fun fiload''(ity, ea, d, notes)
=
{ floating_point_used := TRUE;
annotate_and_emit_expression (mcf::FILOAD { isize=>isize ity, ea, dst=>ea_of_float_reg d }, notes);
}
also
fun float_expression''(e as tcf::CODETEMP_INFO_FLOAT(_, f))
=>
if (is_framreg f) trans_float_expression e;
else f;
fi;
float_expression'' e
=>
trans_float_expression e;
end
also
fun trans_float_expression e
=
{ to_reg = make_float_codetemp_info();
do_float_expression'' (64, e, to_reg, []);
to_reg;
}
# Process a floating point operand.
# Put operand in register when possible.
# The operand should match the given fty.
#
also
fun foperand (fty, e as tcf::CODETEMP_INFO_FLOAT (fty', f))
=>
if (fty == fty') ea_of_float_reg f;
else mcf::FPR (float_expression'' e);
fi;
foperand (fty, tcf::FLOAT_TO_FLOAT(_, _, e))
=>
foperand (fty, e); # nop on the intel32
foperand (fty, e as tcf::FLOAD (fty', ea, ramregion))
=>
# Fold operand when
# the precison matches:
#
if (fty == fty') address (ea, ramregion);
else mcf::FPR (float_expression'' e);
fi;
foperand (fty, e)
=>
mcf::FPR (float_expression'' e);
end
# Process a floating point operand.
# Try to fold in a memory operand or
# conversion from an integer:
#
also
fun fioperand (tcf::CODETEMP_INFO_FLOAT (fty, f)) => (FLOAT, fty, ea_of_float_reg f, []);
fioperand (tcf::FLOAD (fty, ea, ramregion)) => (FLOAT, fty, address (ea, ramregion), []);
#
fioperand (tcf::FLOAT_TO_FLOAT(_, _, e)) => fioperand e; # Nop on intel32.
fioperand (tcf::INT_TO_FLOAT(_, type, e)) => convert_int_to_float (type, e);
#
fioperand (tcf::FNOTE (e, notes)) => fioperand (e); # XXX
fioperand (e) => (FLOAT, 64, mcf::FPR (float_expression'' e), []);
end
# Generate binary operator.
#
# Since the real binary operators
# do not take memory as destination,
# we must ensure this does not happen:
#
also
fun fbinop (target_fty, bin_op, bin_op_r, ibin_op, ibin_op_r, lsrc, rsrc, fd, notes)
=
# Put the mem operand in rsrc
{
fun is_mem_operand (tcf::CODETEMP_INFO_FLOAT(_, f)) => is_framreg f;
is_mem_operand (tcf::FLOAD _ ) => TRUE;
is_mem_operand (tcf::INT_TO_FLOAT(_, (16
| 32), _)) => TRUE;
is_mem_operand (tcf::FLOAT_TO_FLOAT(_, _, t)) => is_mem_operand t;
is_mem_operand (tcf::FNOTE (t, _) ) => is_mem_operand t;
is_mem_operand _ => FALSE;
end;
my (bin_op, ibin_op, lsrc, rsrc)
=
if (is_mem_operand lsrc)
(bin_op_r, ibin_op_r, rsrc, lsrc);
else (bin_op, ibin_op, lsrc, rsrc);
fi;
lsrc = foperand (target_fty, lsrc);
my (kind, fty, rsrc, code)
=
fioperand rsrc;
#
fun dst_must_be_freg f
=
if (target_fty == 64)
#
annotate_and_emit_expression (f(ea_of_float_reg fd), notes);
else
tmp_r = make_float_codetemp_info();
tmp = mcf::FPR tmp_r;
annotate_and_emit_expression (f tmp, notes);
put_base_op (mcf::FMOVE { fsize => fsize target_fty,
src => tmp,
dst => ea_of_float_reg fd
}
);
fi;
case kind
#
FLOAT =>
dst_must_be_freg (\\ dst
=
mcf::FBINOP { fsize => fsize fty,
bin_op, lsrc, rsrc, dst
}
);
INTEGER =>
{ dst_must_be_freg
(\\ dst =
mcf::FIBINOP { isize=>isize fty, bin_op=>ibin_op,
lsrc, rsrc, dst
}
);
put_ops code;
};
esac;
}
also
fun funop (fty, un_op, src, fd, notes)
=
{ src = foperand (fty, src);
annotate_and_emit_expression (mcf::FUNOP { fsize=>fsize fty, un_op, src, dst=>ea_of_float_reg fd }, notes);
}
also
fun do_float_expression'' (fty, expression, to_reg, notes) # Compute value of 'expression' to 'fty'-bit precision, leave result in 'to_reg'.
= # This is "fast" floating point (currently the norm) -- for "slow" (vanilla) floating point see: do_float_expression'
{ floating_point_used := TRUE;
case expression
#
tcf::CODETEMP_INFO_FLOAT(_, fs)
=>
if (not (rkj::codetemps_are_same_color (fs, to_reg) ))
#
copy_floats'' (fty, [to_reg], [fs], notes);
fi;
# Intel32 (x86) does everything as 80-bits internally.
# Binary operators:
#
tcf::FADD (_, a, b) => fbinop (fty, mcf::FADDL, mcf::FADDL, mcf::FIADDL, mcf::FIADDL, a, b, to_reg, notes);
tcf::FSUB (_, a, b) => fbinop (fty, mcf::FSUBL, mcf::FSUBRL, mcf::FISUBL, mcf::FISUBRL, a, b, to_reg, notes);
tcf::FMUL (_, a, b) => fbinop (fty, mcf::FMULL, mcf::FMULL, mcf::FIMULL, mcf::FIMULL, a, b, to_reg, notes);
tcf::FDIV (_, a, b) => fbinop (fty, mcf::FDIVL, mcf::FDIVRL, mcf::FIDIVL, mcf::FIDIVRL, a, b, to_reg, notes);
# Unary operators:
#
tcf::FNEG (_, a) => funop (fty, mcf::FCHS, a, to_reg, notes);
tcf::FABS (_, a) => funop (fty, mcf::FABS, a, to_reg, notes);
tcf::FSQRT (_, a) => funop (fty, mcf::FSQRT, a, to_reg, notes);
# Load:
#
tcf::FLOAD (fty, ea, ramregion) => fload''(fty, ea, ramregion, to_reg, notes);
# Type conversions:
#
tcf::FLOAT_TO_FLOAT(_, _, e) => do_float_expression''(fty, e, to_reg, notes);
tcf::INT_TO_FLOAT(_, type, e)
=>
{ (convert_int_to_float (type, e))
->
(_, type, ea, cleanup);
fiload'' (type, ea, to_reg, notes);
put_ops cleanup;
};
tcf::FNOTE (e, lnt::MARKREG f)
=>
{ f to_reg;
#
do_float_expression'' (fty, e, to_reg, notes);
};
tcf::FNOTE (e, a) => do_float_expression''(fty, e, to_reg, a ! notes);
tcf::FPRED (e, c) => do_float_expression''(fty, e, to_reg, lnt::CONTROL_DEPENDENCY_USE c ! notes);
tcf::FEXT float_expression
=>
txc::compile_fext (reducer()) { e=>float_expression, fd=>to_reg, notes };
_ => error("doFexpr''");
esac;
}
###################################################
# Tie the two styles of fp code generation together
###################################################
also
fun fstore (fty, ea, d, ramregion, notes)
=
if (enable_fast_fpmode and *fast_floating_point) fstore''(fty, ea, d, ramregion, notes);
else fstore' (fty, ea, d, ramregion, notes);
fi
also
fun fload (fty, ea, d, ramregion, notes)
=
if (enable_fast_fpmode and *fast_floating_point)
fload''(fty, ea, d, ramregion, notes);
else fload' (fty, ea, d, ramregion, notes);
fi
also
fun float_expression e
=
if (enable_fast_fpmode
and *fast_floating_point)
float_expression'' e;
else float_expression' e;
fi
also
fun do_float_expression (fty, e, to_reg, notes)
=
if (enable_fast_fpmode and *fast_floating_point) do_float_expression'' (fty, e, to_reg, notes);
else do_float_expression' (fty, e, to_reg, notes);
fi
##################################################################
# Speedups for x := x op y
# Special speedups:
# Generate a binary operator, result must in memory.
# The source must not be in memory
##################################################################
also
fun binary_mem (bin_op, src, dst, ramregion, notes)
=
annotate_and_emit_expression
(
mcf::BINARY
{
bin_op,
src => immed_or_reg (operand src),
dst => address (dst, ramregion)
},
notes
)
also
fun unary_mem (un_op, operand, ramregion, notes)
=
annotate_and_emit_expression (mcf::UNARY { un_op, operand=>address (operand, ramregion) }, notes)
also
fun is_one (tcf::LITERAL n) => n == 1;
is_one _ => FALSE;
end
# Perform speedups based on recognizing
# x := x op y or
# x := y op x
# first.
#
also
fun store (type, ea, d, ramregion, notes,
{ inc, dec, add, sub, notx, neg, shl, shr, sar, orx, andx, xor },
do_store
)
=
{ fun default ()
=
do_store (ea, d, ramregion, notes);
#
fun binary1 (t, t', unary, binary, ea', x)
=
if (t == type and t' == type)
#
if (tcj::same_int_expression (ea, ea'))
#
if (is_one x) unary_mem (unary, ea, ramregion, notes);
else binary_mem (binary, x, ea, ramregion, notes);
fi;
else
default ();
fi;
else
default ();
fi;
#
fun unary (t, un_op, ea')
=
if (t == type
and tcj::same_int_expression (ea, ea')
)
unary_mem (un_op, ea, ramregion, notes);
else
default ();
fi;
#
fun binary (t, t', bin_op, ea', x)
=
if ( t == type
and t' == type
and tcj::same_int_expression (ea, ea')
)
binary_mem (bin_op, x, ea, ramregion, notes);
else
default ();
fi;
#
fun binary_com1 (t, un_op, bin_op, x, y)
=
if (t != type)
#
default ();
else
fun again ()
=
case y
#
tcf::LOAD (type', ea', _)
=>
if (type' == type
and tcj::same_int_expression (ea, ea')
)
if (is_one x) unary_mem ( un_op, ea, ramregion, notes);
else binary_mem (bin_op, x, ea, ramregion, notes);
fi;
else default();
fi;
_ => default();
esac;
case x
#
tcf::LOAD (type', ea', _)
=>
if (type' == type
and tcj::same_int_expression (ea, ea')
)
if (is_one y) unary_mem ( un_op, ea, ramregion, notes);
else binary_mem (bin_op, y, ea, ramregion, notes);
fi;
else again();
fi;
_ => again();
esac;
fi;
#
fun binary_com (t, bin_op, x, y)
=
if (t != type)
#
default();
else
fun again ()
=
case y
#
tcf::LOAD (type', ea', _)
=>
if (type' == type
and tcj::same_int_expression (ea, ea')
)
binary_mem (bin_op, x, ea, ramregion, notes);
else
default ();
fi;
_ => default ();
esac;
case x
tcf::LOAD (type', ea', _)
=>
if (type' == type
and tcj::same_int_expression (ea, ea')
)
binary_mem (bin_op, y, ea, ramregion, notes);
else
again ();
fi;
_ => again ();
esac;
fi;
case d
#
tcf::ADD (t, x, y) => binary_com1 (t, inc, add, x, y);
tcf::SUB (t, tcf::LOAD (t', ea', _), x) => binary1 (t, t', dec, sub, ea', x);
tcf::BITWISE_OR (t, x, y) => binary_com (t, orx, x, y);
tcf::BITWISE_AND (t, x, y) => binary_com (t, andx, x, y);
tcf::BITWISE_XOR (t, x, y) => binary_com (t, xor, x, y);
tcf::LEFT_SHIFT (t, tcf::LOAD (t', ea', _), x) => binary (t, t', shl, ea', x);
tcf::RIGHT_SHIFT_U (t, tcf::LOAD (t', ea', _), x) => binary (t, t', shr, ea', x);
tcf::RIGHT_SHIFT (t, tcf::LOAD (t', ea', _), x) => binary (t, t', sar, ea', x);
tcf::NEG (t, tcf::LOAD (t', ea', _)) => unary (t, neg, ea');
tcf::BITWISE_NOT (t, tcf::LOAD (t', ea', _)) => unary (t, notx, ea');
_ => default();
esac;
} # fun store
# Generate code for a statement.
#
also
fun do_void_expression' (tcf::LOAD_INT_REGISTER (_, rd, e), notes) => do_expression ( e, rd, notes); # "rd" == "destination int register".
do_void_expression' (tcf::LOAD_FLOAT_REGISTER (fty, fd, e), notes) => do_float_expression (fty, e, fd, notes); # "fd" == "destination float register".
do_void_expression' (tcf::LOAD_INT_REGISTER_FROM_FLAGS_REGISTER (ccd, e), notes) => do_flag_expression ( e, ccd, notes); # "ccd" == "destination int registr".
#
do_void_expression' (tcf::MOVE_INT_REGISTERS (_, dst, src), notes) => copy_ints (dst, src, notes); # Parallel copy of N sources to N destinations.
do_void_expression' (tcf::MOVE_FLOAT_REGISTERS (fty, dst, src), notes) => copy_floats (fty, dst, src, notes); # Parallel copy of N sources to N destinations.
#
do_void_expression' ( tcf::GOTO ( destination: tcf::Int_Expression, # Typically just a tcf::LABEL.
possible_destinations: List( lbl::Codelabel ) # possible_distinations is empty if unknown.
),
notes
)
=>
do_goto (destination, possible_destinations, notes);
do_void_expression' (tcf::CALL { funct, targets, defs, uses, region, pops, ... }, notes)
=>
do_call (funct, targets, defs, uses, region, [], notes, pops);
do_void_expression' (tcf::FLOW_TO (tcf::CALL { funct, targets, defs, uses, region, pops, ... }, cut_to), notes)
=>
do_call (funct, targets, defs, uses, region, cut_to, notes, pops);
do_void_expression' (tcf::RET _, notes)
=>
annotate_and_emit_expression (mcf::RET NULL, notes);
do_void_expression' (tcf::STORE_INT ( 8, ea, d, ramregion), notes) => store ( 8, ea, d, ramregion, notes, opcodes8, store8);
do_void_expression' (tcf::STORE_INT (16, ea, d, ramregion), notes) => store ( 16, ea, d, ramregion, notes, opcodes16, store16);
do_void_expression' (tcf::STORE_INT (32, ea, d, ramregion), notes) => store ( 32, ea, d, ramregion, notes, opcodes32, store32);
do_void_expression' (tcf::STORE_FLOAT (fty, ea, d, ramregion), notes) => fstore (fty, ea, d, ramregion, notes);
do_void_expression' (tcf::IF_GOTO (cc, lab), notes) => branch (cc, lab, notes);
do_void_expression' (tcf::DEFINE l, _) => buf.put_private_label l;
do_void_expression' (tcf::LIVE s, notes) => annotate_and_emit_expression' (mcf::LIVE { regs=>tcfexpression_to_registerset s, spilled=>rgk::empty_codetemplists }, notes);
do_void_expression' (tcf::DEAD s, notes) => annotate_and_emit_expression' (mcf::DEAD { regs=>tcfexpression_to_registerset s, spilled=>rgk::empty_codetemplists }, notes);
do_void_expression' (tcf::NOTE (s, a), notes)
=>
do_void_expression' (s, a ! notes);
do_void_expression' (tcf::EXT s, notes)
=>
txc::compile_sext (reducer()) { void_expression=>s, notes };
do_void_expression' (s, _)
=>
do_void_expressions (tct::compile_void_expression s);
end
also
fun do_void_expression s # This is our external 'put_op' entrypoint, used (in particular) in
src/lib/compiler/back/low/main/main/translate-nextcode-to-treecode-g.pkg = do_void_expression' (s, [])
also
fun do_void_expressions ss
=
apply do_void_expression ss
also
fun start_new_cccomponent' _
=
{ # Must be cleared by the client.
# if rewrite_ramreg ramregs_used := 0u0; fi; # No obvious variant on "ramregs_used" exists in the codebase.
floating_point_used := FALSE;
branch_on_overflow_instruction_and_label := NULL;
buf.start_new_cccomponent 0; # The '0' is a dummy here; in some contexts the argument is used to size the codesegment buffer.
}
also
fun get_completed_cccomponent' a
=
{ case *branch_on_overflow_instruction_and_label
#
THE (_, codelabel)
=>
{ buf.put_private_label codelabel;
#
put_base_op mcf::INTO; # 64-bit issue: Intel64 architecture eliminates INTO instruction (changes that opcode into a new instruction prefix byte).
};
NULL => ();
esac;
# If floating point has been used,
# allot an extra codetemp just
# in case we didn't use any explicit
# codetemps:
#
if *floating_point_used make_float_codetemp_info (); (); fi;
buf.get_completed_cccomponent a;
}
also
fun reducer ()
=
tcs::REDUCER
{
reduce_int_expression => expr,
reduce_float_expression => float_expression,
reduce_flag_expression => cc_expr,
reduce_void_expression => do_void_expression',
operand,
reduce_operand,
address_of => \\ e = address (e, mcf::rgn::memory), # XXX
put_op => annotate_and_emit_expression',
codestream => buf,
treecode_stream => self()
}
also
fun self ()
=
{
start_new_cccomponent => start_new_cccomponent',
get_completed_cccomponent => get_completed_cccomponent',
put_op => do_void_expression,
#
put_pseudo_op => buf.put_pseudo_op,
put_private_label => buf.put_private_label,
put_public_label => buf.put_public_label,
put_comment => buf.put_comment,
put_bblock_note => buf.put_bblock_note,
get_notes => buf.get_notes,
#
put_fn_liveout_info => \\ tcf_expr = buf.put_fn_liveout_info (tcfexpression_to_registerset tcf_expr)
}
: Treecode_Codebuffer;
self ();
}; # fun translate_treecode_to_machcode
end; # stipulate
}; # generic package translate_treecode_to_machcode_intel32_g
end; # stipulate