## floating-point-code-intel32-g.pkg
# Compiled by:
#
src/lib/compiler/back/low/intel32/backend-intel32.lib# This phase takes a cluster with pseudo intel32
# fp instructions, performs liveness analysis
# to determine their live ranges, and rewrites
# the program into the correct stack based code.
#
# The Basics
# ----------
# o We assume there are 7 pseudo fp registers, %fp (0), ..., %fp (6),
# which are mapped onto the %st stack. One stack location is reserved
# for holding temporaries.
# o Important: for floating point comparisons, we actually need
# two extra stack locations in the worst case. We handle this by
# specifying that the instruction define an extra temporary fp register
# when necessary.
# o The mapping between %fp <-> %st may change from program point to
# program point. We keep track of this lazy renaming and try to minimize
# the number of FXCH that we insert.
# o At split and merge points, we may get inconsistent %fp <-> %st mappings.
# We handle this by inserting the appropriate renaming code.
# o Parallel copies (renaming) are rewritten into a sequence of FXCHs!
#
# Pseudo fp instructions Semantics
# --------------------------------------
# FMOVE src, dst dst := src
# FILOAD ea, dst dst := cvti2f (mem[ea])
# FBINOP lsrc, rsrc, dst dst := lsrc * rsrc
# FIBINOP lsrc, rsrc, dst dst := lsrc * cvti2f (rsrc)
# FUNOP src, dst dst := unaryOp src
# FCMP lsrc, rsrc fp condition code := fcmp (lsrc, rsrc)
#
# An instruction may use its source operand (s) destructively.
# We find this info using a global liveness analysis.
#
# The Translation
# ---------------
# o We keep track of the namings between %fp registers and the
# %st(..) staack locations.
# o FXCH and FLDL are inserted at the appropriate places to move operands
# to %st (0). FLDL is used if the operand is not dead. FXCH is used
# if the operand is the last use.
# o FCOPY's between pseudo %fp registers are done by software renaming
# and generate no code by itself!
# o FSTL %st (1) are also generated to pop the stack after the last use
# of an operand.
#
# Note
# ----
# 1. This module should be run after floating point register allocation.
#
# -- Allen Leung Leung (leunga@cs.nyu.edu)
#
# See also:
#
# Some notes on the new MLRISC Intel32 floating point code generator (Draft)
# Allen Leung, Lal George
# circa 2000, 17p
# http://www.smlnj.org//compiler-notes/intel32-fp.ps
### "You can't really focus yourself for years
### unless you have undivided concentration,
### which too many spectators would have destroyed."
###
### -- Andrew Wiles
stipulate
package an = note; # note is from
src/lib/src/note.pkg package ast = asm_stream; # asm_stream is from
src/lib/compiler/back/low/emit/asm-stream.pkg package cos = registerkinds_junk::cos; # "cos" == "colorset".
package fil = file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkg package iht = int_hashtable; # int_hashtable is from
src/lib/src/int-hashtable.pkg package im = int_red_black_map; # int_red_black_map is from
src/lib/src/int-red-black-map.pkg package lbl = codelabel; # codelabel is from
src/lib/compiler/back/low/code/codelabel.pkg package lem = lowhalf_error_message; # lowhalf_error_message is from
src/lib/compiler/back/low/control/lowhalf-error-message.pkg package lms = list_mergesort; # list_mergesort is from
src/lib/src/list-mergesort.pkg package odg = oop_digraph; # oop_digraph is from
src/lib/graph/oop-digraph.pkg package pp = standard_prettyprinter; # standard_prettyprinter is from
src/lib/prettyprint/big/src/standard-prettyprinter.pkg package rkj = registerkinds_junk; # registerkinds_junk is from
src/lib/compiler/back/low/code/registerkinds-junk.pkg package rwv = rw_vector; # rw_vector is from
src/lib/std/src/rw-vector.pkg package sos = string_outstream; # string_outstream is from
src/lib/compiler/back/low/library/string-out-stream.pkg Pp = pp::Pp;
debug = FALSE; # Set this to TRUE to debug this module
# set this to FALSE for production use.
#
debug_liveness = TRUE; # Debug liveness analysis
debug_dead = FALSE; # Debug dead code removal
sanity_check = TRUE;
herein
# We are invoked from:
#
#
src/lib/compiler/back/low/intel32/regor/regor-intel32-g.pkg generic package floating_point_code_intel32_g (
# =============================
#
package mcf: Machcode_Intel32; # Machcode_Intel32 is from
src/lib/compiler/back/low/intel32/code/machcode-intel32.codemade.api package mu: Machcode_Universals # Machcode_Universals is from
src/lib/compiler/back/low/code/machcode-universals.api where
mcf == mcf; # "mcf" == "machcode_form" (abstract machine code).
package mcg: Machcode_Controlflow_Graph # Machcode_Controlflow_Graph is from
src/lib/compiler/back/low/mcg/machcode-controlflow-graph.api where
mcf == mcf; # "mcf" == "machcode_form" (abstract machine code).
package liv: Liveness # Liveness is from
src/lib/compiler/back/low/regor/liveness.api where
mcg == mcg;
package ae: Machcode_Codebuffer_Pp # Machcode_Codebuffer_Pp is from
src/lib/compiler/back/low/emit/machcode-codebuffer-pp.api where
mcf == mcf # "mcf" == "machcode_form" (abstract machine code).
also cst::pop == mcg::pop; # "pop" == "pseudo_op".
)
: (weak) Machcode_Controlflow_Graph_Improver # Machcode_Controlflow_Graph_Improver is from
src/lib/compiler/back/low/mcg/machcode-controlflow-graph-improver.api {
# Export to client packages:
#
package mcg = mcg;
stipulate
package tcf = mcf::tcf; # "tcf" == "treecode_form".
package rgk = mcf::rgk; # "rgk" == "registerkinds".
herein
Flowgraph = mcg::Machcode_Controlflow_Graph;
An = an::Notes;
improvement_name = "Intel32 (x86) floating point rewrite";
fp_debug_mode_intel32 = lowhalf_control::make_bool ("fp_debug_mode_intel32", "intel32 fp debug mode");
fp_trace_mode_intel32 = lowhalf_control::make_bool ("fp_trace_mode_intel32", "intel32 fp trace mode");
fun error msg
=
lem::error("floating_point_code_intel32_g", msg);
fun pr msg
=
fil::write (*lowhalf_control::debug_stream, msg);
i2s = int::to_string;
#####################################
# No overflow checking is needed for
# integer arithmetic in this module
#####################################
fun registerlist_to_registerset l
=
list::fold_backward
#
rkj::cls::add_codetemp_to_appropriate_kindlist
#
rkj::cls::empty_codetemplists
#
l;
fun registerlist_to_string l
=
rkj::cls::codetemplists_to_string
#
(registerlist_to_registerset l);
exception TARGET_MOVED_TO odg::Node_Id; # Annotation to mark split edges.
#########################################################################
# Base instruction-handling routines
#########################################################################
# Annotate an instruction:
#
fun mark (op, []) => op;
mark (op, note ! notes) => mark (mcf::NOTE { op, note }, notes);
end;
# Add pop suffix to a binary operator:
#
fun pop mcf::FADDL => mcf::FADDP; pop mcf::FADDS => mcf::FADDP;
pop mcf::FSUBL => mcf::FSUBP; pop mcf::FSUBS => mcf::FSUBP;
pop mcf::FSUBRL => mcf::FSUBRP; pop mcf::FSUBRS => mcf::FSUBRP;
pop mcf::FMULL => mcf::FMULP; pop mcf::FMULS => mcf::FMULP;
pop mcf::FDIVL => mcf::FDIVP; pop mcf::FDIVS => mcf::FDIVP;
pop mcf::FDIVRL => mcf::FDIVRP; pop mcf::FDIVRS => mcf::FDIVRP;
pop _ => error "fbinop::pop";
end;
# Invert the operator:
#
fun invert mcf::FADDL => mcf::FADDL; invert mcf::FADDS => mcf::FADDS;
invert mcf::FSUBL => mcf::FSUBRL; invert mcf::FSUBS => mcf::FSUBRS;
invert mcf::FSUBRL => mcf::FSUBL; invert mcf::FSUBRS => mcf::FSUBS;
invert mcf::FMULL => mcf::FMULL; invert mcf::FMULS => mcf::FMULS;
invert mcf::FDIVL => mcf::FDIVRL; invert mcf::FDIVS => mcf::FDIVRS;
invert mcf::FDIVRL => mcf::FDIVL; invert mcf::FDIVRS => mcf::FDIVS;
invert mcf::FADDP => mcf::FADDP; invert mcf::FMULP => mcf::FMULP;
invert mcf::FSUBP => mcf::FSUBRP; invert mcf::FSUBRP => mcf::FSUBP;
invert mcf::FDIVP => mcf::FDIVRP; invert mcf::FDIVRP => mcf::FDIVP;
invert _ => error "invert";
end;
# Pseudo instructions:
#
fun fld_fn (mcf::FP32, ea) => mcf::flds ea;
fld_fn (mcf::FP64, ea) => mcf::fldl ea;
fld_fn (mcf::FP80, ea) => mcf::fldt ea;
end;
fun fild_fn (mcf::INT8, ea) => error "FILD";
fild_fn (mcf::INT16, ea) => mcf::fild ea;
fild_fn (mcf::INT1, ea) => mcf::fildl ea;
fild_fn (mcf::INT2, ea) => mcf::fildll ea;
end;
fun fstp_fn (mcf::FP32, ea) => mcf::fstps ea;
fstp_fn (mcf::FP64, ea) => mcf::fstpl ea;
fstp_fn (mcf::FP80, ea) => mcf::fstpt ea;
end;
fun fst_fn (mcf::FP32, ea) => mcf::fsts ea;
fst_fn (mcf::FP64, ea) => mcf::fstl ea;
fst_fn (mcf::FP80, ea) => error "FSTT";
end;
# -----------------------------------------------------------------------
# Prettyprint routines
# -----------------------------------------------------------------------
fun freg_to_string f
=
"%f" + i2s (rkj::intrakind_register_id_of f);
fun fregs_to_string s
=
list::fold_backward
\\ (r, "") => freg_to_string r;
(r, s) => freg_to_string r + " " + s;
end
""
s;
fun blknum_of (mcg::BBLOCK { id, ... } )
=
id;
# -----------------------------------------------------------------------
# A stack enum that mimics the intel32 floating point stack
# and keeps track of namings between %st (n) and %fp (n).
# -----------------------------------------------------------------------
package st
:
api {
Stack;
Stnum = Int; # 0 -- 7
create: Void -> Stack;
stack0: Stack;
copy: Stack -> Stack;
clear: Stack -> Void;
fp: (Stack, rkj::Interkind_Register_Id) -> Stnum;
st: (Stack, Stnum) -> rkj::Interkind_Register_Id;
set: (Stack, Stnum, rkj::Interkind_Register_Id) -> Void;
push: (Stack, rkj::Interkind_Register_Id) -> Void;
xch: (Stack, Stnum, Stnum) -> Void;
pop: Stack -> Void;
depth: Stack -> Int;
non_full: Stack -> Void;
kill: (Stack, rkj::Codetemp_Info) -> Void;
stack_to_string: Stack -> String;
equal: (Stack, Stack) -> Bool;
}
=
package {
Stnum = Int;
Stack = STACK { st: rwv::Rw_Vector( rkj::Interkind_Register_Id ), # Mapping %st -> %fp registers
fp: rwv::Rw_Vector( Stnum ), # Mapping %fp -> %st registers
sp: Ref( Int ) # Stack pointer.
};
# Create a new stack:
#
fun create ()
=
STACK { st => rwv::make_rw_vector (8,-1),
fp => rwv::make_rw_vector (7, 16),
sp => REF -1
};
stack0 = create();
# Copy a stack:
#
fun copy (STACK { st, fp, sp } )
=
{ st' = rwv::make_rw_vector (8, -1);
fp' = rwv::make_rw_vector (7, 16);
rwv::copy { from => st, into => st', at => 0 };
rwv::copy { from => fp, into => fp', at => 0 };
STACK { st=>st', fp=>fp', sp=>REF *sp };
};
# Depth of stack:
#
fun depth (STACK { sp, ... } )
=
*sp + 1;
fun non_full (STACK { sp, ... } )
=
if (*sp >= 7) error "stack overflow"; fi;
# Given %st (n), lookup the corresponding %fp (n)
#
fun st (STACK { st, sp, ... }, n)
=
rwv::get (st, *sp - n);
# Given %fp (n), lookup the corresponding %st (n)
#
fun fp (STACK { fp, sp, ... }, n)
=
*sp - rwv::get (fp, n);
fun stack_to_string stack
=
{ depth = depth stack;
#
fun f i
=
if (i >= depth ) " ]";
else "%st(" + i2s i + ")=%f" + i2s (st (stack, i)) + " " + f (i+1);
fi;
"[ " + f 0;
};
fun clear (STACK { st, fp, sp, ... } )
=
{ sp := -1;
#
rwv::map_in_place (\\ _ = -1) st;
rwv::map_in_place (\\ _ = 16) fp;
};
# Set %st (n) := %f
#
fun set (STACK { st, fp, sp, ... }, n, f)
=
{ rwv::set (st, *sp - n, f);
#
if (f >= 0) rwv::set (fp, f, *sp - n); fi;
};
# Pop one entry:
#
fun pop (STACK { sp, st, fp, ... } )
=
sp := *sp - 1;
# Push %fp (f) onto %st (0)
#
fun push (stack as STACK { sp, ... }, f)
=
{ sp := *sp + 1;
#
set (stack, 0, f);
};
# Exchange the contents of %st (m) and %st (n):
#
fun xch (stack, m, n)
=
{ f_m = st (stack, m);
f_n = st (stack, n);
set (stack, m, f_n);
set (stack, n, f_m);
};
fun kill (STACK { fp, ... }, f)
=
rwv::set (fp, rkj::intrakind_register_id_of f, 16);
fun equal (st1, st2)
=
{ m = depth st1;
n = depth st2;
fun loop i
=
i >= m
or
( st (st1, i) == st (st2, i)
and
loop (i+1)
);
m == n
and
loop 0;
};
}; # pkg st
# -----------------------------------------------------------------------
# Module to handle forward propagation.
# Forward propagation does the following:
# Given an instruction
# fmove mem, %fp (n)
# We delay the generation of the load until the first use of %fp (n),
# which we can further improve by folding the load into the operand
# of the instruction, if it is the last use of this operand.
# If %fp (n) is dead then no load is necessary.
# Of course, we have to be careful whenever we encounter other
# instruction with a write.
# -----------------------------------------------------------------------*)
/*
package ForwardPropagation :>
api
type readbuffer
my create: st::stack -> readbuffer
my load: readbuffer * rgk::register * mcf::fsize * mcf::ea -> Void
my getreg: readbuffer * Bool * rgk::register * List( mcf::instruction ) ->
mcf::operand * List( mcf::instruction )
my flush: readbuffer * List( mcf::instruction ) -> List( mcf::instruction )
end =
pkg
enum readbuffer =
READ of { stack: st::stack,
loads: rwv::Rw_Vector( Null_Or( mcf::fsize * mcf::ea ) ),
pending: Ref( Int )
}
fun create stack =
READ { stack =stack,
loads =rwv::make_rw_vector (8, NULL),
pending =REF 0
}
fun load (READ { pending, loads, ... }, fd, fsize, mem) =
(rwv::set (loads, fd, THE (fsize, mem));
pending := *pending + 1
)
/* Extract the operand for a register
* If it has a delayed load associated with it then
* we perform the load at this time.
*/
fun getreg (READ { pending, loads, stack, ... }, isLastUse, fs, code) =
case rwv::get (loads, fs) of
NULL =>
let n = st::st (stack, fs)
in if isLastUse
then (ST n, code)
else let code = mcf::FLDL (ST n) ! code
in st::push (stack, fs); (ST0, code)
end
end
| THE (fsize, mem) =>
let code = fld_fn (fsize, mem) ! code
in rwv::set (loads, fs, NULL); # Delete load
pending := *pending - 1;
st::push (stack, fs); # fs is now in place
(ST0, code)
end
/* Extract a binary operand.
* We'll try to fold this into the operand
*/
fun getopnd (READ { pending, loads, stack, ... }, isLastUse, mcf::FPR fs, code) =
(case rwv::get (loads, fs) of
NULL =>
let n = st::st (stack, fs)
in if isLastUse fs # regmap XXX
then (ST n, code)
else let code = mcf::FLDL (ST n) ! code
in st::push (stack, fs); (ST0, code)
end
end
| THE (fsize, mem) =>
(rwv::set (loads, fs, NULL); # Delete load
pending := *pending - 1;
if isLastUse fs then (mem, code)
else let code = fld_fn (fsize, mem) ! code
in st::push (stack, fs);
(ST0, code)
end
)
)
| getopnd(_, _, ea, code) = (ea, code)
fun flush (READ { pending=REF 0, ... }, code) = code
end # pkg
*/
# -----------------------------------------------------------------------
# Module to handle delayed stores.
# Delayed store does the following:
# Given an instruction
# fstore %fp (n), %mem
# We delay the generation of the store until necessary.
# This gives us an opportunity to rearrange the order of the stores
# to eliminate unnecessary fxch.
# -----------------------------------------------------------------------
/*
package DelayStore :>
api
type writebuffer
my create: st::stack -> writebuffer
my flush: writebuffer * List( mcf::instruction ) -> List( mcf::instruction )
end =
pkg
enum writebuffer =
WRITE of { front: Ref( List (mcf::ea * rgk::register) ),
back: Ref( List (mcf::ea * rgk::register) ),
stack: st::stack,
pending: Ref( Int )
}
fun create stack = WRITE { front=REF [], back=REF [],
stack=stack, pending=REF 0 }
fun flush (WRITE { pending=REF 0, ... }, code) = code
end # pkg
*/
# -----------------------------------------------------------------------
# Main routine.
#
# Algorithm:
# 1. Perform liveness analysis.
# 2. For each fp register, mark all its last use point (s).
# Registers are popped at their last uses.
# 3. Rewrite the instructions basic block by basic block.
# 4. Insert shuffle code at basic block boundaries.
# When necessary, split critical edges.
# 5. Sacrifice a goat to make sure things don't go wrong.
# -----------------------------------------------------------------------
fun run (mcg' as odg::DIGRAPH mcg)
=
{
number_of_blks = mcg.capacity ();
entry_i = list::head (mcg.entries ());
exit_i = list::head (mcg.exits ());
get_float_codetemp_infos = rgk::get_codetemp_infos_for_kind rkj::FLOAT_REGISTER; # extract the fp component of registerset
st_table = rwv::from_fn (8, \\ n = mcf::ST (rgk::st n));
fun st_fn n
=
{ if (sanity_check and (n < 0 or n >= 8))
pr("WARNING BAD %st(" + i2s n + ")\n");
fi;
rwv::get (st_table, n);
};
fun fxch_fn n
=
mcf::fxch { operand=>rgk::st n };
st0 = st_fn 0;
st1 = st_fn 1;
pop_st = mcf::fstpl st0; # Instruction to pop an entry
# Dump instructions:
#
fun dump instrs
=
{ # buf = ast::with_stream *lowhalf_control::debug_stream ae::make_codebuffer [];
text = pp::prettyprint_to_string [] {.
pp = #pp;
buf = ae::make_codebuffer pp [];
apply buf.put_op (reverse instrs);
};
print text;
};
# Create assembly-code for one machine instruction:
#
fun assemble op
=
{
# stream_buf = sos::make_stream_buf ();
# stream = sos::open_string_out stream_buf;
# buf = ast::with_stream stream ae::make_codebuffer [];
# buf.put_op op;
# s = sos::get_string stream_buf;
s = pp::prettyprint_to_string [] {.
pp = #pp;
buf = ae::make_codebuffer pp [];
buf.put_op op;
};
n = string::length_in_bytes s;
if (n == 0) s;
else string::substring (s, 0, n - 1); # Drop terminal newline?
fi;
};
# ------------------------------------------------------------------
# Perform liveness analysis on the floating point variables
# p::S. I'm glad I didn't throw away the code liveness code.
# ------------------------------------------------------------------
def_use = mu::def_use rkj::FLOAT_REGISTER; # Def/use properties
my { live_in=>live_in_table, live_out=>live_out_table }
=
liv::liveness {
def_use,
# updateRegister=rgk::updateRegistersByKind rkj::FLOAT_REGISTER,
get_codetemps_of_our_kind => get_float_codetemp_infos
} mcg';
# ------------------------------------------------------------------
# Scan the instructions compute the last uses and dead definitions
# at each program point. Ideally we can do this during the code
# rewriting phase. But that's probably too error prone for now.
# ------------------------------------------------------------------
fun compute_last_use (blknum, ops, live_out)
=
{ fun scan ([], _, last_use)
=>
last_use;
scan (i ! instrs, live, last_use)
=>
{ (def_use i) -> (d, u);
#
d = cos::make_colorset d; # Definitions
u = cos::make_colorset u; # uses
#
dead = cos::get_codetemps_in_colorset (cos::difference_of_colorsets (d, live));
live = cos::difference_of_colorsets (live, d);
last = cos::get_codetemps_in_colorset (cos::difference_of_colorsets (u, live));
live = cos::union_of_colorsets (live, u);
if (debug and debug_liveness)
#
case last
#
[] => ();
_ => print (assemble i + "\tlast use=" + fregs_to_string last + "\n");
esac;
fi;
scan (instrs, live, (last, dead) ! last_use);
};
end;
live_out_set = cos::make_colorset live_out;
if (debug and debug_liveness)
#
print("LiveOut(" + i2s blknum + ") = " +
fregs_to_string (cos::get_codetemps_in_colorset live_out_set) + "\n");
fi;
scan (*ops, live_out_set, []);
};
####################################################################
# Temporary work space
stipulate
(rgk::get_id_range_for_physical_register_kind rkj::FLOAT_REGISTER)
->
{ max_register_id, ... };
herein
n = max_register_id + 1;
end;
last_use_table = rwv::make_rw_vector (n,-1); # Table for marking last uses.
use_table = rwv::make_rw_vector (n,-1); # Table for marking uses.
# %fp register namings before and after a basic block
#
namings_in = rwv::make_rw_vector (number_of_blks, NULL);
namings_out = rwv::make_rw_vector (number_of_blks, NULL);
stamp_counter = REF -4096;
# Edges that need splitting:
#
exception NO_EDGES_TO_SPLIT;
edges_to_split = iht::make_hashtable { size_hint => 32, not_found_exception => NO_EDGES_TO_SPLIT };
add_edges_to_split = iht::set edges_to_split;
fun lookup_edges_to_split b
=
the_else (iht::find edges_to_split b, []);
# ------------------------------------------------------------------
# Code for handling namings between basic block
# ------------------------------------------------------------------
fun split_edge (title, source, target, e)
=
{ if (debug and *fp_trace_mode_intel32)
pr (title + " SPLITTING " + i2s source + "->" + i2s target + "\n");
fi;
add_edges_to_split (target, (source, target, e) ! lookup_edges_to_split target);
};
# fun compute_freq (_, _, mcg::EDGE { execution_frequency, ... } ) # Is this ever used?
# =
# *execution_frequency;
# Given a registerset, return a sorted and unique
# list of elements with all non-physical registers removed
#
fun remove_non_physical registerlist
=
loop (registerlist, [])
where
fun loop ([], sss)
=>
cos::get_codetemps_in_colorset (cos::make_colorset sss);
loop (f ! fs, sss)
=>
{ fx = rkj::intrakind_register_id_of f;
loop (fs, if (fx <= 7) f ! sss; else sss;fi);
};
end;
end;
# Given a sorted and unique list of registers,
# Return a stack with these elements
#
fun new_stack fregs
=
{ stack = st::create();
apply (\\ f = st::push (stack, rkj::intrakind_register_id_of f))
(reverse fregs);
stack;
};
# This function looks at all the entries on the stack,
# and generate code to deallocate all the dead values.
# The stack is updated.
#
fun remove_dead_values (stack, live_set, code)
=
loop (0, st::depth stack, code)
where
stamp = *stamp_counter;
stamp_counter := *stamp_counter - 1;
fun mark_live []
=>
();
mark_live (r ! rs)
=>
{ rwv::set (use_table, rkj::intrakind_register_id_of r, stamp);
mark_live rs;
};
end;
fun is_live f
=
rwv::get (use_table, f) == stamp;
fun loop (i, depth, code)
=
if (i >= depth)
code;
else
f = st::st (stack, i);
if (is_live f) # live?
loop (i+1, depth, code);
else
if (debug and *fp_trace_mode_intel32)
pr("REMOVING %f" + i2s f + " in %st(" + i2s i + ")" +
" current stack=" + st::stack_to_string stack + "\n");
fi;
if (i == 0)
st::pop stack;
loop (0, depth - 1, pop_st ! code);
else
st::xch (stack, 0, i);
st::pop stack;
loop (0, depth - 1, mcf::fstpl (st_fn i) ! code);
fi;
fi;
fi;
mark_live live_set;
end;
# ------------------------------------------------------------------
# Given two stacks, source and target, where the namings are
# permutation of each other, generate the minimal number of
# fxchs to match source with target.
#
# Important: source and target MUST be permutations of each other.
#
# Essentially, we first decompose the permutation into cycles,
# and process each cycle.
# ------------------------------------------------------------------
#
fun shuffle (source, target, code)
=
{ stamp = *stamp_counter;
stamp_counter := *stamp_counter - 1;
permutation = last_use_table; /* reuse the space */
if (debug and *fp_trace_mode_intel32)
pr("Shuffle " + st::stack_to_string source +
"->" + st::stack_to_string target + "\n");
fi;
# Compute the initial permutation
#
n = st::depth source;
#
fun compute_initial_permutation (i)
=
if (i < n)
f = st::st (source, i);
j = st::fp (target, f);
rwv::set (permutation, j, i);
compute_initial_permutation (i+1);
fi;
compute_initial_permutation 0;
# Decompose the initial permutation into cycles.
# The cycle involving 0 is treated specially.
visited = use_table;
fun is_visited i
=
rwv::get (visited, i) == stamp;
fun mark_as_visited i
=
rwv::set (visited, i, stamp);
fun decompose_cycles (i, cycle0, cycles)
=
if (i >= n)
(cycle0, cycles);
elif (is_visited i or rwv::get (permutation, i) == i) # trivial cycle
decompose_cycles (i+1, cycle0, cycles);
else
fun make_cycle (j, cycle, zero)
=
{ k = rwv::get (permutation, j);
cycle = j ! cycle;
zero = zero or j == 0;
mark_as_visited j;
if (k == i) (cycle, zero);
else make_cycle (k, cycle, zero);
fi;
};
my (cycle, zero)
=
make_cycle (i, [], FALSE);
zero
?? decompose_cycles (i+1, [cycle], cycles)
:: decompose_cycles (i+1, cycle0, cycle ! cycles);
fi;
my (cycle0, cycles)
=
decompose_cycles (0, [], []);
# Generate shuffle for a cycle that does not involve 0.
# Given a cycle (c_1, ..., c_k), we generate this code:
# fxch %st (c_1),
# fxch %st (c_2),
# ...
# fxch %st (c_k),
# fxch %st (c_1)
#
fun genxch ([], code) => code;
genxch (c ! cs, code) => genxch (cs, fxch_fn c ! code);
end;
fun gen ([], code) => error "shuffle::gen";
gen (cs as (c ! _), code) => fxch_fn c ! genxch (cs, code);
end;
# Generate shuffle for a cycle that involves 0.
# Given a cycle (c_1, ..., c_k) we first shuffle this to
# an equivalent cycle (c_1, ..., c_k) where c'_k = 0,
# then we generate this code:
# fxch %st (c'_1),
# fxch %st (c'_2),
# ...
# fxch %st (c'_{ k - 1 } ),
#
fun gen0 ([], code)
=>
error "shuffle::gen0";
gen0 (cs, code)
=>
{ fun rearrange (0 ! cs, cs') => cs@reverse cs';
rearrange (c ! cs, cs') => rearrange (cs, c ! cs');
rearrange ([], _) => error "shuffle::rearrange";
end;
cs = rearrange (cs, []);
genxch (cs, code);
};
end;
# Generate code. Must process
# the non-zero cycles first:
#
code = list::fold_backward gen code cycles;
code = list::fold_backward gen0 code cycle0;
code;
}; # fun shuffle
/*------------------------------------------------------------------
* Insert code at the end of a basic block.
* Make sure we put code in front of a transfer instruction
*------------------------------------------------------------------*/
fun insert_at_end (ops, code)
=
case ops
#
[] => code;
jmp ! rest
=>
mu::instruction_kind jmp == mu::k::JUMP
?? jmp ! code @ rest
:: code @ ops;
esac;
/*------------------------------------------------------------------
* Magic for inserting shuffle code at the end of a basic block
*------------------------------------------------------------------*/
fun shuffle_out (stack_out, ops, b, block, live_out)
=
{
live_out = remove_non_physical (live_out);
# Generate code that removes
# unnecessary values:
#
code = remove_dead_values (stack_out, live_out, []);
fun done (stack_out, ops, code)
=
{ rwv::set (namings_out, b, THE stack_out);
insert_at_end (ops, code);
};
# Generate code that shuffles values
# from source to target:
#
fun match (source, target)
=
done (target, ops, shuffle (source, target, []));
# Generate code that shuffles
# values from source to live_out:
#
fun match_live_out ()
=
case live_out
[] => done (stack_out, ops, code);
_ => match (stack_out, new_stack live_out);
esac;
# With multiple successors, decide
# which one to connect to. We choose
# the one from the block that follows
# from this one, if that exists, or
# else the edge with the highest frequency:
#
fun find ([], _, id, best)
=>
(id, best);
find((_, target, _) ! edges, highest_freq, id, best)
=>
{ (mcg.node_info target)
->
mcg::BBLOCK { execution_frequency, ... };
if (target == b+1)
#
(target, rwv::get (namings_in, target));
else
case (rwv::get (namings_in, target))
#
NULL => find (edges, highest_freq, id, best);
this as THE stack
=>
if (highest_freq < *execution_frequency) find (edges, *execution_frequency, target, this);
else find (edges, highest_freq, id, best);
fi;
esac;
fi;
};
end;
# Split all edges source->target
# except omit_this:
#
fun split_all_edges_except ([], omit_this)
=>
();
split_all_edges_except((source, target, e) ! edges, omit_this)
=>
if (target == exit_i)
error "can't split exit edge!";
else
if ( target != omit_this
and target <= b # XXX
and target != entry_i
)
split_edge("ShuffleOut", source, target, e);
fi;
split_all_edges_except (edges, omit_this);
fi;
end;
# Just one successor.
# Try to match the namings of
# the successor if it exists:
#
fun match_it next
=
{ my (succ_block, target)
=
find (next, -1.0, -1, NULL);
split_all_edges_except (next, succ_block);
case target
THE stack_in => match (stack_out, stack_in);
NULL => done (stack_out, ops, code);
esac;
};
case (mcg.out_edges b)
[] => match_live_out();
next as [(_, target, _)]
=>
target == exit_i
?? match_live_out ()
:: match_it next;
next =>
match_it next;
esac;
}; # fun shuffle_out
# ------------------------------------------------------------------
# Compute the initial fp stack namings for basic block b.
# ------------------------------------------------------------------
fun shuffle_in (b, block, live_in)
=
{
live_in_set = remove_non_physical live_in;
# With multiple predecessors, find out which one we
# should connect to. Choose the one from the block that
# falls into this one, if that exists, or else choose
# from the edge with the highest frequency.
#
fun find ([], _, best)
=>
best;
find ((source, _, _) ! edges, highest_freq, best)
=>
{ (mcg.node_info source)
->
mcg::BBLOCK { execution_frequency, ... };
case (rwv::get (namings_out, source))
#
NULL => find (edges, highest_freq, best);
this as THE stack
=>
if (source == b - 1) this; # Falls into b.
elif (highest_freq < *execution_frequency) find (edges, *execution_frequency, this);
else find (edges, highest_freq, best);
fi;
esac;
};
end;
fun split_all_done_edges []
=>
();
split_all_done_edges ((source, target, e) ! edges)
=>
{ if ( source < b
and source != entry_i
and source != exit_i
)
split_edge("ShuffleIn", source, target, e);
fi;
split_all_done_edges edges;
};
end;
# The initial stack namings are
# determined by the live set.
# No compensation code is needed.
#
fun from_live_in ()
=
{ stack_in
=
case live_in_set
[] => st::stack0;
_ => { pr("liveIn=" + registerlist_to_string live_in + "\n");
new_stack live_in_set ;
};
esac;
stack_out = st::copy stack_in;
(stack_in, stack_out, []);
};
prior = mcg.in_edges b;
my (stack_in, stack_out, code)
=
case (find (prior, -1.0, NULL))
NULL =>
{ split_all_done_edges prior;
from_live_in ();
};
THE stack_in'
=>
case prior
[_] =>
{ # One predecessor.
# Use the namings as from the previous block
# We first have to deallocate all unused values.
#
stack_out = st::copy stack_in';
# Clean the stack of unused entries:
#
code = remove_dead_values (stack_out, live_in_set, []);
(stack_in', stack_out, code);
};
prior =>
{ # More than one predecessor.
stack_in = st::copy stack_in';
code = remove_dead_values (stack_in, live_in_set, []);
stack_out = st::copy stack_in;
# If we have to generate code to deallocate
# the stack then we have split the edge:
#
case code
[] => ();
_ => split_all_done_edges (prior);
esac;
(stack_in, stack_out, []);
};
esac;
esac;
rwv::set (namings_in, b, THE stack_in );
rwv::set (namings_out, b, THE stack_out);
(stack_in, stack_out, code);
};
# ------------------------------------------------------------------
# Code for patching up critical edges.
# The trick is finding a good place to insert the critical edges.
# Let's call an edge x->y that requires compensation
# code c to be inserted an candidate edge. We write this as x->y (c)
#
# Here are the heuristics that we use to improve the final code:
#
# 1. Given two candidate edges a->x (c1) and b->x (c2) where c1=c2
# then we can merge the two copies of compensation code.
# This is quite common. This generalizes to any number of edges.
#
# 2. Given two candidate edges a->x (c1) and b->x (c2) and where
# c1 and c2 are pops, we can partially share c1 and c2.
# Currently, I think I only recognize this case when
# x has no fp registers live-in.
#
# 3. Given two candidate edges a->x (c1) and b->x (c2),
# if a->x has a higher frequency then put the compensation
# code in front of x (so that it falls through into x)
# whenever possible.
#
# As you can see, the voodoo is strong here.
#
# The routine has two main phases:
# 1. Determine the compensation code by applying the heuristics
# above.
# 2. Then insert them and rebuild the mcg by renaming all block
# ids. This is currently necessary to keep the layout order
# consistent with the order of the id.
# ------------------------------------------------------------------
fun repair_critical_edges (mcg' as odg::DIGRAPH mcg)
=
{
cleanup = [lowhalf_notes::comment.x_to_note "cleanup edge" ];
critical = [lowhalf_notes::comment.x_to_note "critical edge"];
fun annotate (gen, an)
=
apply (\\ ((_, mcg::BBLOCK { notes, ... } ), _)
=
notes := an
)
gen;
# Special case: target block has stack depth of 0.
# Just generate code that pop entries from the sources.
# To make things interesting, we try to share code among
# all the critical edges.
#
fun gen_popping_code (_, [])
=>
();
gen_popping_code (target_id, edges)
=>
{ # Edges annotated with the source stack depth
# Ordered by increasing stack height
#
edges
=
im::keyvals_list
(fold_backward
(\\ (edge as (source_id, _, _), mmm)
=
{ n = st::depth (the (rwv::get (namings_out, source_id)));
im::set (mmm, n, edge ! the_else (im::get (mmm, n), []));
}
)
im::empty
edges
);
# Generate n pops:
#
fun pops (0, code) => code;
pops (n, code) => pops (n - 1, pop_st ! code);
end;
# Create the chain of blocks:
#
fun make_chain (depth, [], chain)
=>
chain;
make_chain (depth, (d, es) ! es', chain)
=>
{ code = pops (d - depth, []);
make_chain (d, es', (es, code) ! chain);
};
end;
chain = make_chain (0, edges, []);
annotate
( mcg::split_edges # split_edges def in
src/lib/compiler/back/low/mcg/machcode-controlflow-graph-g.pkg mcg'
{ groups => chain,
jump => FALSE
},
cleanup
);
};
end;
# Generate repair code.
#
fun gen_repair_code (target_id, stack_in, edges)
=
{ live_in = iht::get live_in_table target_id;
live_in_set = remove_non_physical live_in;
if debug pr("LiveIn = " + registerlist_to_string live_in + "\n"); fi;
# Group all edges whose output stack configurations
# are the same. Each group is merged together into
# a single compensation block
#
fun partition ([], s)
=>
s;
partition((e as (src, _, _)) ! es, s)
=>
find (s, [])
where
stack_out = st::copy (the (rwv::get (namings_out, src)));
fun find ([], s)
=>
partition (es, ([e], stack_out) ! s);
find((x as (es', st')) ! s', s)
=>
if (st::equal (stack_out, st'))
partition (es, (e ! es', st') ! s' @ s);
else
find (s', x ! s);
fi;
end;
end;
end;
# Partition by the source namings:
#
sss = partition (edges, []);
# Compute frequencies
#
sss = map (\\ (edges, st)
=
(mcg::sum_edge_execution_frequencies edges, edges, st)
)
sss;
# Order by non-increasing frequencies:
#
sss = lms::sort_list
#
(\\ ((x, _, _), (y, _, _)) = x < y)
#
sss;
# Generate code:
#
fun gen (freq, edges, stack_out)
=
{ # Deallocate unused values:
#
code = remove_dead_values (stack_out, live_in_set,[]);
# Shuffle values:
#
code = shuffle (stack_out, stack_in, code);
annotate(
mcg::split_edges mcg' { groups => [(edges, code)], jump => FALSE },
critical);
};
apply gen sss;
};
# Split all edges entering target_id:
#
fun split (target_id, edges)
=
{ stack_in = the (rwv::get (namings_in, target_id));
fun log (s, t, e)
=
case (rwv::get (namings_out, s))
THE stack_out
=>
pr ("SPLIT " + i2s s + "->" + i2s t + " " +
st::stack_to_string stack_out + "->" +
st::stack_to_string stack_in + "\n"
);
NULL => error "split: stack_out";
esac;
if (debug and *fp_trace_mode_intel32) apply log edges; fi;
st::depth stack_in == 0
?? gen_popping_code (target_id, edges)
:: gen_repair_code (target_id, stack_in, edges);
};
iht::keyed_apply split edges_to_split;
mcg::note_topology_changes mcg';
mcg';
};
/*------------------------------------------------------------------
* Process all blocks which are not the entry or the exit
*------------------------------------------------------------------*/
stamp = REF 0;
fun rewrite_all_blocks (_, mcg::BBLOCK { kind=>mcg::START, ... } ) => ();
rewrite_all_blocks (_, mcg::BBLOCK { kind=>mcg::STOP, ... } ) => ();
rewrite_all_blocks (blknum, block as mcg::BBLOCK { ops, labels, notes, ... } )
=>
{
if (debug and *fp_debug_mode_intel32)
#
apply (\\ l = pr (lbl::codelabel_to_string l + ":\n"))
*labels;
fi;
live_in = iht::get live_in_table blknum;
live_out = iht::get live_out_table blknum;
st = rewrite ( *stamp, blknum, block,
ops, live_in, live_out,
notes
);
stamp := st; # Update stamp.
};
end
# ------------------------------------------------------------------
# Translate code within a basic block.
# Each instruction is given a unique stamp for identifying last
# uses.
# ------------------------------------------------------------------
also
fun rewrite (stamp, blknum, block, ops, live_in, live_out, notes)
=
{ (shuffle_in (blknum, block, live_in))
->
(stack_in, stack, code);
# Dump instructions when encountering a bug:
#
fun bug msg
=
{ pr ("-------- bug in block " + i2s blknum + " ----\n");
dump *ops;
error msg;
};
fun loop (stamp, [], [], code)
=>
(stamp, code);
loop (stamp, instruction ! rest, (last_use, dead) ! last_uses, code)
=>
{ fun mark (table, [])
=>
();
mark (table, r ! rs)
=>
{ rwv::set (table, rkj::intrakind_register_id_of r, stamp);
mark (table, rs);
};
end;
mark (last_use_table, last_use); # mark all last uses
trans (stamp, instruction, [], rest, dead, last_uses, code);
};
loop _ => error "loop";
end
# Main routine that does the actual translation.
# A few reminders:
# o The instructions are processed in normal order
# and generated in the reversed order.
# o (Local) liveness is computed at the same time.
# o For each use, we have to find out whether it is
# the last use. If so, we can kill it and reclaim
# the stack entry at the same time.
#
also
fun trans (stamp, instruction, an, rest, dead, last_uses, code)
=
{ # Call this fate when
# done with code generation:
#
fun finish_fn code
=
loop (stamp+1, rest, last_uses, code);
fun kill_the_dead (dead, code)
=
kill (dead, code)
where
fun kill ([], code)
=>
finish_fn code;
kill (f ! fs, code)
=>
{ fx = rkj::intrakind_register_id_of f;
if (debug and debug_dead )
pr("DEAD " + freg_to_string f + " in " +
st::stack_to_string stack + "\n");
fi;
# not a physical register
if (fx >= 8 )
kill (fs, code);
else
i = st::fp (stack, fx);
if (debug and debug_dead )
pr("KILLING " + freg_to_string f +
"=%st(" + i2s i + ")\n");
fi;
if (i < 0 )
kill (fs, code); # Dead already
elif (i == 0)
st::pop stack;
kill (fs, pop_st ! code);
else
st::xch (stack, 0, i); st::pop stack;
kill (fs, mcf::fstpl (st_fn i) ! code);
fi;
fi;
};
end; # fun kill
end; # where (fun kill_the_dead)
# Call this fate when
# done with floating point
# code generation. Remove all
# dead code first:
#
fun done_fn code
=
kill_the_dead (dead, code);
# Is this the last use
# of register f?
#
fun is_last_use f
=
rwv::get (last_use_table, f) == stamp;
# Is this value dead?
#
fun is_dead f
=
loop dead
where
fun loop [] => FALSE;
loop (r ! rs) => rkj::codetemps_are_same_color (f, r) or loop rs;
end;
end;
# Dump the stack before each intruction for debugging:
#
fun log ()
=
if (debug and *fp_trace_mode_intel32)
pr (st::stack_to_string stack + assemble instruction + "...\n");
fi;
# Find the location of a source register:
#
fun getfs (f)
=
{ fx = rkj::intrakind_register_id_of f;
s = st::fp (stack, fx);
(is_last_use fx, s);
};
# Generate memory to memory move:
#
fun mmmove (fsize, src, dst)
=
{ st::non_full stack;
code = fld_fn (fsize, src) ! code;
code = mark (fstp_fn (fsize, dst), an) ! code;
done_fn code;
};
# Allocate a new register in %st (0):
#
fun allot (f, code)
=
{ st::push (stack, rkj::intrakind_register_id_of f);
code;
};
# register -> register move
#
fun rrmove (fs, fd)
=
if (rkj::codetemps_are_same_color (fs, fd))
#
done_fn code;
else
my (dead, ss) = getfs fs;
if dead
#
# fs is dead.
st::set (stack, ss, rkj::intrakind_register_id_of fd); # Rename fd to fs.
done_fn code; # No code is generated.
else
# fs is not dead; push it onto %st (0);
# set fd to %st (0)
code = allot (fd, code);
done_fn (mark (mcf::fldl (st_fn ss), an) ! code);
fi;
fi;
# memory -> register move.
# Do dead code elimination here.
#
fun mrmove (fsize, src, fd)
=
if (is_dead fd )
finish_fn code; # value has been killed
else
code = allot (fd, code);
done_fn (mark (fld_fn (fsize, src), an) ! code);
fi;
# Exchange %st (n) and %st (0):
#
fun xch n
=
{ st::xch (stack, 0, n);
fxch_fn n;
};
# Push %st (n) onto the stack:
#
fun push n
=
{ st::push (stack,-2);
mcf::fldl (st_fn n);
};
# Push mem onto the stack:
#
fun pushmem src
=
{ st::push (stack,-2);
mcf::fldl (src);
};
# register -> memory move.
# Use pop version of the opcode
# if it is the last use:
#
fun rmmove (fsize, fs, dst)
=
{ fun fstp code
=
{ st::pop stack;
done_fn (mark (fstp_fn (fsize, dst), an) ! code);
};
fun fst code
=
done_fn (mark (fst_fn (fsize, dst), an) ! code);
case (getfs fs)
(TRUE, 0) => fstp code;
(TRUE, n) => fstp (xch n ! code);
(FALSE, 0) => fst (code);
(FALSE, n) => fst (xch n ! code);
esac;
};
# Floating point move:
#
fun fmove { fsize, src=>mcf::FPR fs, dst=>mcf::FPR fd } => rrmove (fs, fd);
fmove { fsize, src, dst=>mcf::FPR fd } => mrmove (fsize, src, fd);
fmove { fsize, src=>mcf::FPR fs, dst } => rmmove (fsize, fs, dst);
fmove { fsize, src, dst } => mmmove (fsize, src, dst);
end;
# Floating point integer load operator:
#
fun fiload { isize, ea, dst=>mcf::FPR fd }
=>
{ code = allot (fd, code);
code = mark (fild_fn (isize, ea), an) ! code;
done_fn code;
};
fiload { isize, ea, dst }
=>
{ code = mark (fild_fn (isize, ea), an) ! code;
code = mcf::fstpl (dst) ! code; # XXX
done_fn code;
};
end;
# Make a copy of register fs to %st (0).
#
fun moveregtotop (fs, code)
=
case (getfs fs)
(TRUE, 0) => code;
(TRUE, n) => xch n ! code;
(FALSE, n) => push n ! code;
esac;
fun movememtotop (fsize, mem, code)
=
{ st::push (stack, -2);
fld_fn (fsize, mem) ! code;
};
# Move an operand to top of stack:
#
fun movetotop (fsize, mcf::FPR fs, code) => moveregtotop (fs, code);
movetotop (fsize, mem, code) => movememtotop (fsize, mem, code);
end;
fun store_result (fsize, dst, n, code)
=
case dst
mcf::FPR fd
=>
{ st::set (stack, n, rkj::intrakind_register_id_of fd);
done_fn code;
};
mem =>
{ code = (n == 0) ?? code
:: xch n ! code;
st::pop stack;
done_fn (fstp_fn (fsize, mem) ! code);
};
esac;
# Floating point unary operator:
#
fun funop { fsize, un_op, src, dst }
=
{ code = movetotop (fsize, src, code);
code = mark (mcf::funary un_op, an) ! code;
# Moronic hack to deal with partial tangent! XXX BUGGO FIXME
#
code =
case un_op
mcf::FPTAN
=>
{ if (st::depth stack >= 7 ) error "FPTAN"; fi;
pop_st ! code; # pop the useless 1.0
};
_ => code;
esac;
store_result (fsize, dst, 0, code);
};
# Floating point binary operator.
# Note:
# binop src, dst
# means dst := dst binop src
# (lsrc := lsrc binop rsrc)
# on the intel32
#
fun fbinop { fsize, bin_op, lsrc, rsrc, dst }
=
{ # generate code and set %st (n) = fd */
# op2 := op1 - op2
fun op (bin_op, op1, op2, n, code)
=
{ code = mark (mcf::fbinary { bin_op, src=>op1, dst=>op2 }, an)
! code;
store_result (mcf::FP64, dst, n, code);
};
fun oper_r (bin_op, op1, op2, n, code)
=
op (invert bin_op, op1, op2, n, code);
fun oper_p (bin_op, op1, op2, n, code)
=
{ st::pop stack;
op (pop bin_op, op1, op2, n - 1, code);
};
fun oper_rp (bin_op, op1, op2, n, code)
=
{ st::pop stack;
oper_r (pop bin_op, op1, op2, n - 1, code);
};
# Many special cases to consider.
# Basically, try to reuse stack space as
# much as possible by taking advantage of last uses.
#
# Stack=[st (0)=3.0 st (1)=2.0]
# fsub %st (1), %st [1, 2.0]
# fsubr %st (1), %st [-1, 2.0]
# fsub %st, %st (1) [3.0, 1.0]
# fsubr %st, %st (1) [3.0,-1.0]
#
# fsubp %st, %st (1) [1]
# fsubrp %st, %st (1) [-1]
# So,
# fsub %st (n), %st (means %st - %st (n) -> %st)
# fsub %st, %st (n) (means %st - %st (n) -> %st (n))
# fsubr %st (n), %st (means %st (n) - %st -> %st)
# fsubr %st, %st (n) (means %st (n) - %st -> %st (n))
#
fun reg2 (fx, fy)
=
{ my (dx, sx) = getfs fx;
my (dy, sy) = getfs fy;
fun loop (dx, sx, dy, sy, code)
=
# op1, op2 (dst)
case (dx, sx, dy, sy)
(TRUE, 0, FALSE, n) => op (bin_op, st_fn n, st0, 0, code);
(FALSE, n, TRUE, 0) => oper_r (bin_op, st_fn n, st0, 0, code);
(TRUE, n, TRUE, 0) => oper_rp (bin_op, st0, st_fn n, n, code);
(TRUE, 0, TRUE, n) => oper_p (bin_op, st0, st_fn n, n, code);
(FALSE, 0, TRUE, n) => op (bin_op, st0, st_fn n, n, code);
(TRUE, n, FALSE, 0) => oper_r (bin_op, st0, st_fn n, n, code);
(TRUE, sx, dy, sy)
=>
loop (TRUE, 0, dy, sy, xch sx ! code);
(dx, sx, TRUE, sy)
=>
loop (dx, sx, TRUE, 0, xch sy ! code);
(FALSE, sx, FALSE, sy)
=>
loop (TRUE, 0, FALSE, sy+1, push sx ! code);
esac;
if (sx == sy ) # Same register.
code = case (dx, sx)
(TRUE, 0) => code;
(TRUE, n) => xch n ! code;
(FALSE, n) => push n ! code;
esac;
op (bin_op, st0, st0, 0, code);
else
loop (dx, sx, dy, sy, code);
fi;
};
# reg/mem operands
#
fun regmem (bin_op, fx, mem)
=
case (getfs fx)
(TRUE, 0) => op (bin_op, mem, st0, 0, code);
(TRUE, n) => op (bin_op, mem, st0, 0, xch n ! code);
(FALSE, n) => op (bin_op, mem, st0, 0, push n ! code);
esac;
# Two memory operands. Optimize the case when
# the two operands are identical.
#
fun mem2 (lsrc, rsrc)
=
{ st::push (stack,-2);
code = fld_fn (fsize, lsrc) ! code;
rsrc = mu::eq_operand (lsrc, rsrc)
?? st0
:: rsrc;
op (bin_op, rsrc, st0, 0, code);
};
fun process (mcf::FPR fx, mcf::FPR fy) => reg2 (fx, fy);
process (mcf::FPR fx, mem) => regmem (bin_op, fx, mem);
process (mem, mcf::FPR fy) => regmem (invert bin_op, fy, mem);
process (lsrc, rsrc) => mem2 (lsrc, rsrc);
end;
process (lsrc, rsrc);
};
# Floating point binary operator with integer conversion:
#
fun fibinop { isize, bin_op, lsrc, rsrc, dst }
=
{ fun op (bin_op, src, code)
=
{ code = mark (mcf::fibinary { bin_op, src }, an)
! code;
store_result (mcf::FP64, dst, 0, code);
};
fun regmem (bin_op, fx, mem)
=
case (getfs fx)
(TRUE, 0) => op (bin_op, mem, code);
(TRUE, n) => op (bin_op, mem, xch n ! code);
(FALSE, n) => op (bin_op, mem, push n ! code);
esac;
case (lsrc, rsrc)
(mcf::FPR fx, mem) => regmem (bin_op, fx, mem);
(lsrc, rsrc) => op (bin_op, rsrc, pushmem lsrc ! code);
esac;
};
# Floating point comparison
# We have to make sure there are enough registers.
# The trick is that tmp is always a physical register.
# So we can always use it as temporary space if we
# have run out.
#
fun fcmp { i, fsize, lsrc, rsrc }
=
{ fun fucompp code
=
{ st::pop stack; st::pop stack;
i ?? pop_st ! mark (mcf::fucomip (st_fn 1), an) ! code
:: mark (mcf::fucompp, an) ! code;
};
fun fucomp n
=
{ st::pop stack;
mark
( (i ?? mcf::fucomip :: mcf::fucomp) (st_fn n),
an
);
};
fun fucom n
=
mark ((i ?? mcf::fucomi :: mcf::fucom) (st_fn n), an);
fun genmemcmp ()
=
{ code = movememtotop (fsize, rsrc, code);
code = movememtotop (fsize, lsrc, code);
finish_fn (fucompp (code));
};
fun genmemregcmp (lsrc, fy)
=
case (getfs fy)
(FALSE, n)
=>
{ code = movememtotop (fsize, lsrc, code);
finish_fn (fucomp (n+1) ! code);
};
(TRUE, n)
=>
{ code = n == 0 ?? code
:: xch n ! code;
code = movememtotop (fsize, lsrc, code);
finish_fn (fucompp code);
};
esac;
fun genregmemcmp (fx, rsrc)
=
{ code = case (getfs fx)
(TRUE, n)
=>
{ code = n == 0 ?? code
:: xch n ! code;
code = movememtotop (fsize, rsrc, code);
xch 1 ! code;
};
(FALSE, n)
=>
{ code = movememtotop (fsize, rsrc, code);
push (n+1) ! code;
};
esac;
finish_fn (fucompp code);
};
# Deal with the special case
# where both sources are
# in the same register
#
fun regsame (dx, sx)
=
finish_fn (cmp ! code)
where
my (code, cmp)
=
case (dx, sx)
(TRUE, 0) => (code, fucomp 0); # pop once!
(FALSE, 0) => (code, fucom 0); # Don't pop!
(TRUE, n) => (xch n ! code, fucomp 0);
(FALSE, n) => (xch n ! code, fucom 0);
esac;
end;
fun reg2 (fx, fy)
=
# Special case is when things are already in place.
# Note: should also generate FUCOM and FUCOMP!!! XXX BUGGO FIXME
#
{ my (dx, sx) = getfs fx;
my (dy, sy) = getfs fy;
fun fstp n
=
{ st::xch (stack, n, 0);
st::pop stack;
mcf::fstpl (st_fn n);
};
if (sx == sy)
regsame (dx, sx); # Same register!
else
# First, move sx to %st (0):
#
my (sy, code)
=
if (sx == 0) # There already.
( sy,
code
);
else
( sy == 0 ?? sx :: sy,
xch sx ! code
);
fi;
# Generate the appropriate comparison op
#
my (sy, code, pop_y)
=
case (dx, dy, sy)
(TRUE, TRUE, 0) => (-1, fucompp code, FALSE);
(TRUE, _, _) => (sy - 1, fucomp sy ! code, dy);
(FALSE, _, _) => (sy, fucom sy ! code, dy);
esac;
# Pop fy if it is dead and hasn't already
# been popped.
#
code = pop_y ?? fstp sy ! code
:: code;
finish_fn code;
fi;
};
case (lsrc, rsrc)
(mcf::FPR x, mcf::FPR y) => reg2 (x, y);
(mcf::FPR x, mem) => genregmemcmp (x, mem);
(mem, mcf::FPR y) => genmemregcmp (mem, y);
_ => genmemcmp ();
esac;
};
fun pr_copy (dst, src)
=
paired_lists::apply
(\\ (fd, fs)
=
pr (freg_to_string (fd) + "<-" + freg_to_string fs + " ")
)
(dst, src);
# Parallel copy magic.
#
# For each src register, we find out
#
# 1. Whether it is the last use, and if so,
# 2. whether it is used more than once.
#
# If a source is a last and unique use,
# then we can simply rename it to
# the appropriate destination register:
#
fun fcopy (mcf::COPY { dst, src, tmp, ... } )
=>
{
fun loop ([], [], copies, renames)
=>
(copies, renames);
loop (fd ! fds, fs ! fss, copies, renames)
=>
{ fsx = rkj::intrakind_register_id_of fs;
if (is_last_use fsx)
if (rwv::get (use_table, fsx) != stamp)
# Unused.
rwv::set (use_table, fsx, stamp);
loop
( fds,
fss,
copies,
rkj::codetemps_are_same_color (fd, fs)
?? renames
:: (fd, fs) ! renames
);
else
loop (fds, fss, (fd, fs) ! copies, renames);
fi;
else
loop (fds, fss, (fd, fs) ! copies, renames);
fi;
};
loop _
=>
error "fcopy::loop";
end;
# Generate code for the copies:
#
fun gen_copy ([], code)
=>
code;
gen_copy((fd, fs) ! copies, code)
=>
{ ss = st::fp (stack, rkj::intrakind_register_id_of fs);
st::push (stack, rkj::intrakind_register_id_of fd);
code = mcf::fldl (st_fn ss) ! code;
gen_copy (copies, code);
};
end;
# Perform the renaming.
# It must be done in parallel!
#
fun renaming renames
=
{ ss = map (\\ (_, fs) = st::fp (stack, rkj::intrakind_register_id_of fs))
renames;
paired_lists::apply
(\\ ((fd, _), ss)
=
st::set (stack, ss, rkj::intrakind_register_id_of fd)
)
(renames, ss);
};
# if debug then
# (paired_lists::apply (\\ (fd, fs) =>
# pr (fregToString (regmap fd) + "<-" +
# fregToString (regmap fs) + " ")
# ) (dst, src);
# pr "\n")
# else ()
my (copies, renames)
=
loop (dst, src, [], []);
code = gen_copy (copies, code);
renaming renames;
case tmp
THE (mcf::FPR f)
=>
{ if (debug and debug_dead )
pr("KILLING tmp " + freg_to_string f + "\n");
fi;
st::kill (stack, f);
};
_ => ();
esac;
done_fn code;
};
fcopy _ => error "fcopy";
end;
fun call (instruction, return)
=
{
code = mark (mcf::BASE_OP instruction, an) ! code;
return_set = rkj::sortuniq_colored_codetemps (get_float_codetemp_infos return);
case return_set
#
[] => ();
[r] => st::push (stack, rkj::intrakind_register_id_of r);
_ => error "can't return more than one fp argument (yet)";
esac;
kill_the_dead (list::filter is_dead return_set, code);
};
fun intel32trans instruction
=
case instruction
mcf::FMOVE x => { log(); fmove x;};
mcf::FBINOP x => { log(); fbinop x;};
mcf::FIBINOP x => { log(); fibinop x;};
mcf::FUNOP x => { log(); funop x;};
mcf::FILOAD x => { log(); fiload x;};
mcf::FCMP x => { log(); fcmp x;};
# Handle calling convention:
#
mcf::CALL { return, ... }
=>
{ log();
call (instruction, return);
};
# Catch instructions that absolutely
# should not have been generated
# at this point:
#
( mcf::FLD1
| mcf::FLDL2E | mcf::FLDLG2 | mcf::FLDLN2 | mcf::FLDPI
| mcf::FLDZ | mcf::FLDL _ | mcf::FLDS _ | mcf::FLDT _
| mcf::FILD _ | mcf::FILDL _ | mcf::FILDLL _
| mcf::FENV _ | mcf::FBINARY _ | mcf::FIBINARY _ | mcf::FUNARY _
| mcf::FUCOMPP | mcf::FUCOM _ | mcf::FUCOMP _ | mcf::FCOMPP | mcf::FXCH _
| mcf::FCOMI _ | mcf::FCOMIP _ | mcf::FUCOMI _ | mcf::FUCOMIP _
| mcf::FSTPL _ | mcf::FSTPS _ | mcf::FSTPT _ | mcf::FSTL _ | mcf::FSTS _
) =>
bug ("Illegal FP instructions");
# Leave other instructions untouched:
#
other_instruction
=>
finish_fn (mark (mcf::BASE_OP other_instruction, an) ! code);
esac;
case instruction
#
mcf::NOTE { note, op }
=>
trans (stamp, op, note ! an, rest, dead, last_uses, code);
mcf::COPY { kind => rkj::FLOAT_REGISTER, ... }
=>
{ log();
fcopy instruction;
};
mcf::LIVE _
=>
done_fn (mark (instruction, an) ! code);
mcf::BASE_OP instruction
=>
intel32trans instruction;
_ => finish_fn (mark (instruction, an) ! code);
esac;
}; # fun trans
# Check the translation result
# to see if it matches the
# original code:
#
fun check_translation (stack_in, stack_out, ops)
=
{ n = REF (st::depth stack_in);
fun push () = n := *n + 1;
fun pop () = n := *n - 1;
fun scan (mcf::BASE_OP (mcf::FBINARY { bin_op, ... } ))
=>
case bin_op
( mcf::FADDP
| mcf::FSUBP | mcf::FSUBRP | mcf::FMULP
| mcf::FDIVP | mcf::FDIVRP) => pop();
_ => ();
esac;
scan (mcf::BASE_OP (mcf::FIBINARY { bin_op, ... } )) => ();
scan (mcf::BASE_OP (mcf::FUNARY mcf::FPTAN)) => push();
scan (mcf::BASE_OP (mcf::FUNARY _)) => ();
scan (mcf::BASE_OP (mcf::FLDL (mcf::ST n))) => push();
scan (mcf::BASE_OP (mcf::FLDL mem)) => push();
scan (mcf::BASE_OP (mcf::FLDS mem)) => push();
scan (mcf::BASE_OP (mcf::FLDT mem)) => push();
scan (mcf::BASE_OP (mcf::FSTL (mcf::ST n))) => ();
scan (mcf::BASE_OP (mcf::FSTPL (mcf::ST n))) => pop();
scan (mcf::BASE_OP (mcf::FSTL mem)) => ();
scan (mcf::BASE_OP (mcf::FSTS mem)) => ();
scan (mcf::BASE_OP (mcf::FSTPL mem)) => pop();
scan (mcf::BASE_OP (mcf::FSTPS mem)) => pop();
scan (mcf::BASE_OP (mcf::FSTPT mem)) => pop();
scan (mcf::BASE_OP (mcf::FXCH { operand=>i, ... } )) => ();
scan (mcf::BASE_OP (mcf::FUCOM _)) => ();
scan (mcf::BASE_OP (mcf::FUCOMP _)) => pop();
scan (mcf::BASE_OP (mcf::FUCOMPP)) => { pop(); pop();};
scan (mcf::BASE_OP (mcf::FILD mem)) => push();
scan (mcf::BASE_OP (mcf::FILDL mem)) => push();
scan (mcf::BASE_OP (mcf::FILDLL mem)) => push();
scan (mcf::BASE_OP (mcf::CALL { return, ... } ))
=>
{ n := 0; # Clear the stack
# Simulate the pushing of arguments:
#
{ return_set = rkj::sortuniq_colored_codetemps (get_float_codetemp_infos return);
apply (\\ _ = push()) return_set;
};
};
scan _ => ();
end;
apply scan (reverse ops);
n = *n;
m = st::depth stack_out;
if (n != m)
dump ops;
bug("Bad translation n=" + i2s n + " expected=" + i2s m + "\n");
fi;
};
# Dump the initial code:
#
if (debug and *fp_debug_mode_intel32)
pr("-------- block " + i2s blknum + " ----" +
registerlist_to_string live_in + " " +
st::stack_to_string stack_in + "\n");
dump *ops;
pr("next=");
apply (\\ b => pr (i2s b + " "); end ) (mcg.next blknum);
pr "\n";
fi;
# Compute the last uses:
#
last_use = compute_last_use (blknum, ops, live_out);
# Rewrite the code:
#
my (stamp, ops')
=
loop (stamp, reverse *ops, last_use, code);
# Insert shuffle code at the end if necessary:
#
ops' = shuffle_out (stack, ops', blknum, block, live_out);
# Dump translation:
#
if (debug and *fp_debug_mode_intel32)
#
pr("-------- translation " + i2s blknum + "----" +
registerlist_to_string live_in + " " +
st::stack_to_string stack_in + "\n");
dump ops';
pr("-------- done " + i2s blknum + "----" +
registerlist_to_string live_out + " " +
st::stack_to_string stack + "\n");
fi;
# Check if things are okay:
#
if (debug and sanity_check)
#
check_translation (stack_in, stack, ops');
fi;
ops := ops'; # Update the basic-block machine-instruction list.
stamp;
}; # fun rewrite
# Translate all blocks:
#
stamp := rgk::codetemp_id_if_above;
mcg.forall_nodes rewrite_all_blocks;
# If we found critical edges
# then we have to split them:
#
if (iht::vals_count edges_to_split == 0)
#
mcg';
else
repair_critical_edges mcg';
fi;
};
end;
}; # generic package floating_point_code_intel32_g
end; # stipulate