## squash-jumps-and-write-code-to-code-segment-buffer-intel32-g.pkg -- variable length jump-address backpatching.
#
# This is an intel32-specific replacement for the general
#
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer-sparc32-g.pkg# module which we use on other architectures.
#
# See src/lib/compiler/back/low/doc/latex/span-dep.tex
#
# NOTE on max_variable_length_backpatch_iterations:
#
# max_variable_length_backpatch_iterations is the
# number of times a span-dependent instruction
# is allowed to expand in a non-monotonic way.
#
# This table shows the number of span-dependent instructions
# whose size was over-estimated as a function of id, for the
# file src/lib/compiler/front/parser/yacc/mythryl.grammar.pkg:
#
# max # of instructions:
# --- ------------------
# 10 687
# 20 438
# 30 198
# 40 0
#
# In compiling the compiler, there is no significant difference in
# compilation speed between max=10 and max=40.
# Indeed 96% of the files in the compiler reach a fix point within
# 13 iterations.
#
# ======================================================
# 2011-06-16 CrT: This sounds terribly naive and kludgey.
# This problem was basically solved in the 1980s, but nobody
# here seems to have gone back and read the papers.
#
# If I recall correctly, one trick was to start by assuming
# max length for all jumps and then contracting them. Any
# jump which reached minimum length would then stay minimum,
# hence could be regarded as constant bytes thereafter. In
# practice this would be the overwhelming majority of jumps.
#
# (Starting with every jump at max size also has the pleasant
# property that we can stop at any time and have a valid
# -- i.e., executable -- set of jump lengths assignments.
# Starting with jumps set to logically impossible minimum
# sizes enjoys no such property.)
#
# Also, tracking both minimum and maximum possible span for
# a given jump would allow most to be fixed at a given length
# immediately.
#
# A suitable tree should allow the span of any instruction to
# computed in O(log(N)) time.
#
# (Seems to me it should be possible to dependency-order jumps
# that survive the first pass.)
#
# My recollection was that 2-3 passes converged for all realistic
# programs; artificial examples could take longer.
# Compiled by:
#
src/lib/compiler/back/low/lib/lowhalf.libstipulate
package lbl = codelabel; # codelabel is from
src/lib/compiler/back/low/code/codelabel.pkg package lem = lowhalf_error_message; # lowhalf_error_message is from
src/lib/compiler/back/low/control/lowhalf-error-message.pkg package lhc = lowhalf_control; # lowhalf_control is from
src/lib/compiler/back/low/control/lowhalf-control.pkg package odg = oop_digraph; # oop_digraph is from
src/lib/graph/oop-digraph.pkg package pp = standard_prettyprinter; # standard_prettyprinter is from
src/lib/prettyprint/big/src/standard-prettyprinter.pkg package w8v = vector_of_one_byte_unts; # vector_of_one_byte_unts is from
src/lib/std/src/vector-of-one-byte-unts.pkg package cv = compiler_verbosity; # compiler_verbosity is from
src/lib/compiler/front/basics/main/compiler-verbosity.pkg #
Npp = pp::Npp;
herein
# This generic is invoked from:
#
#
src/lib/compiler/back/low/main/intel32/backend-lowhalf-intel32-g.pkg #
generic package squash_jumps_and_make_machinecode_bytevector_intel32_g (
# ======================================================
#
package csb: Code_Segment_Buffer; # Code_Segment_Buffer is from
src/lib/compiler/execution/code-segments/code-segment-buffer.api package jmp: Jump_Size_Ranges; # Jump_Size_Ranges is from
src/lib/compiler/back/low/jmp/jump-size-ranges.api package mu: Machcode_Universals # Machcode_Universals is from
src/lib/compiler/back/low/code/machcode-universals.api where
mcf == jmp::mcf; # "mcf" == "machcode_form" (abstract machine code).
package xe: Execode_Emitter # Execode_Emitter is from
src/lib/compiler/back/low/emit/execode-emitter.api where # "xe" == "execode emitter".
mcf == mu::mcf; # "mcf" == "machcode_form" (abstract machine code).
package mcg: Machcode_Controlflow_Graph # Machcode_Controlflow_Graph is from
src/lib/compiler/back/low/mcg/machcode-controlflow-graph.api where # "mcg" == "machcode_controflow_graph".
mcf == xe::mcf; # "mcf" == "machcode_form" (abstract machine code).
# package ae: Machcode_Codebuffer # Machcode_Codebuffer is from
src/lib/compiler/back/low/emit/machcode-codebuffer.api # where # "ae" == "asmcode_emitter"
# mcf == mcg::mcf; # "mcf" == "machcode_form" (abstract machine code).
)
: (weak) Squash_Jumps_And_Write_Code_To_Code_Segment_Buffer # Squash_Jumps_And_Write_Code_To_Code_Segment_Buffer is from
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer.api #
# Changing the above from weak to strong sealing appears to throw the
# compiler into some sort of infinite loop. --2011-06-16 CrT XXX BUGGO FIXME.
{
# Export to client packages:
#
package mcg = mcg; # "mcg" == "machcode_controlflow_graph".
stipulate
package mcf = jmp::mcf; # "mcf" == "machcode_form" (abstract machine code).
package rgk = mcf::rgk; # "rgk" == "registerkinds".
package pop = mcg::pop; # "pop" == "pseudo_op".
herein
# This is the view of our code we use while
# we're assigning each programcounter-relative
# jump the shortest possible length. It is a
# linklist chained through second tuple field:
#
Codechain
= BYTES (w8v::Vector, Codechain) # Machinecode sequence whose length is fixed -- basically, everything but PC-relative jumps and branches. :-)
| PSEUDO (pop::Pseudo_Op, Codechain)
# Pseudo-ops. We have to worry about alignment restrictions on basic blocks.
| JUMP (mcf::Machine_Op, Ref(Int), Codechain)
# A jump whose encoding length depends on distance to target address.
| LABEL (lbl::Codelabel, Codechain)
# A label to which a jump op might jump.
| NIL
# End of linklist.
;
max_variable_length_backpatch_iterations
=
lhc::make_int (
"max_variable_length_backpatch_iterations",
"number of variable-length backpath iterations"
);
my _ =
max_variable_length_backpatch_iterations := 40;
fun error msg
=
lem::error("vlBackPatch", msg);
# The assembly-language "text segment" will contain all machine instructions;
# The assembly language "data segment" will contain constants etc.
# We accumulate these separately in these two lists.
# (We need this even if we are generating machine-code directly # We currently generate assembly-code only for human display.
# without going through an assembly-code pass.)
#
my textseg_list: Ref( List( Codechain ) ) = REF []; # More icky thread-hostile mutable global state. XXX BUGGO FIXME.
my dataseg_list: Ref( List( pop::Pseudo_Op ) ) = REF []; # More icky thread-hostile mutable global state. XXX BUGGO FIXME.
fun clear__textseg_list__and__dataseg_list ()
=
{ textseg_list := [];
dataseg_list := [];
};
# Extract and return all constants and code from given list of basic blocks,
# saving them in (respectively) dataseg_list/textseg_list.
#
# Our basic-block list was generated in
#
#
src/lib/compiler/back/low/block-placement/make-final-basic-block-order-list-g.pkg
#
# and possibly tweaked in
#
#
src/lib/compiler/back/low/block-placement/forward-jumps-to-jumps-g.pkg #
# The textseg_list+dataseg_list we produce will be used in our below fun finish().
#
#
# We are called (only) from
src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg #
# Once we return from this function
# we are done with both the controlflow
# graph and the bblocks list.
#
fun extract_all_code_and_data_from_machcode_controlflow_graph
#
(npp: pp::Npp, cv: cv::Compiler_Verbosity)
#
( odg::DIGRAPH { graph_info => mcg::GRAPH_INFO { dataseg_pseudo_ops, ... }, ... },
bblocks # 'bblocks' lists all basic blocks in graph, in the order in which they should be concatenated to produce binary code.
) # This list was generated in
src/lib/compiler/back/low/block-placement/make-final-basic-block-order-list-g.pkg = # and then possibly tweaked in
src/lib/compiler/back/low/block-placement/forward-jumps-to-jumps-g.pkg # This global-ref-lists stuff SUCKS POINTLESSLY. It would be TRIVIAL
# to just return the lists here and pass them into 'finish'. XXX BUGGO FIXME.
#
{ textseg_list := (note_code_in bblocks) ! *textseg_list;
dataseg_list := *dataseg_pseudo_ops @ *dataseg_list;
}
where
# First arg is list of zero or more bytevectors
# to prepend to second argment, a codechain.
#
fun add_bytevectors_to_codechain ([ ], p) => p; # No bytevectors so nothing to do.
add_bytevectors_to_codechain ([s], p) => BYTES (s, p); # Single bytevector -- prepend it.
add_bytevectors_to_codechain ( s, p) => BYTES (w8v::cat s, p); # Multiple bytevectors -- concatenate them into one, then prepend it.
end;
fun note_code_in ((_, mcg::BBLOCK { alignment_pseudo_op, labels, ops, ... } ) ! remaining_bblocks)
#
# On each call, we add everything for one basic block to
# our testseg_list result accumulator. We then iterate
# until we've processed all given bblocks.
#
# For each bblock, we have three components to handle:
#
# 1) There may be an alignment pseudo-op specifying that
# the basic block is supposed to be word-aligned or such.
# If present, this must go first.
#
# 2) There can (must!) be code labels, so that other bblocks
# can jump to us. These need to go next.
#
# 3) Last comes the actual machine code. This will usually # "op" == "(abstract) machine instruction".
# consist of some plain machine instructions, which we can at # Note that the 'ops' list arrives in reverse order -- e.g., terminal jump/branch will be first.
# this point reduce to just bytestrings of absolute code, and
# one final conditional or unconditional jump, which we still
# need to represent abstractly because we have not yet decided
# how many bytes of offset address will be needed.
#
# In principle, either or both of these components may be missing;
# there might be no plain instructions, and if we just fall through
# to the next basic block, there will be no actual jump instruction.
=>
case *alignment_pseudo_op
#
THE pseudo_op => PSEUDO (pseudo_op, do_labels *labels); # Alignment pseudo-op first, then the rest.
NULL => do_labels *labels ; # No alignment pseudo-op on this basic block.
esac
where
# Add all machine instructions in basic block to result.
# At this point all plain intsructions become just BYTES bytestrings;
# only the final pc-relative branch/jump, if any stays abstract,
# as a JUMP entry, since we don't yet know how big an offset field
# to use for it:
#
# First arg is our machine-instructions list, now in
# forward order (i.e., terminal jump last).
#
fun do_ops ([], result)
=>
add_bytevectors_to_codechain
(
reverse result,
note_code_in remaining_bblocks # Done with this basic block; loop through remaining blocks recursively.
);
do_ops (this_op ! remaining_ops, result)
=>
if (not (jmp::is_sdi this_op))
# # Plain (fixed-length-in-bytes) machine instruction.
result = xe::op_to_bytevector(this_op) ! result; # Convert abstract machine instruction to absolute bytevector and add to result.
#
do_ops ( remaining_ops, result ); # Do rest of basic block recursively.
else
add_bytevectors_to_codechain
( reverse result,
JUMP ( this_op, # PC-relative jump/branch proper.
REF (jmp::min_size_of this_op), # Initial guess as to what size jump/branch should be.
do_ops (remaining_ops, []) # Recursively process rest of machine instructions in basic block. 'remaining_ops' should always be [] here, right...?
)
);
fi;
end;
# Add all codelabels on block to result, then
# call do_ops to add all the machine instructions:
#
fun do_labels (label ! rest) => LABEL (label, do_labels rest);
do_labels [] => do_ops (reverse *ops, []); # *ops is in reverse order, reversing that yields normal forward order.
end;
end;
note_code_in [] => NIL;
end;
end; # fun extract_all_code_and_data_from_machcode_controlflow_graph
fun squash_jumps_and_write_all_machine_code_and_data_bytes_into_code_segment_buffer (npp:Npp, cv: cv::Compiler_Verbosity)
=
# This fun gets called more or less immediately after above fun
#
# extract_all_code_and_data_from_machcode_controlflow_graph
#
# For context, see the comments in:
#
#
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer.api #
{
# So far we've been keeping separate
# the dataseg_list of global constants etc and
# the textseg_list of actual code.
#
# We now combine those into a single list
# of stuff in the order in which it should
# be printed as assembly code (or processed
# into a machinecode bytevector), which is to say,
# with all dataseg stuff logically first and
# all textseg stuff following in order.
#
# We have to do this flattening before we can
# do jump-squashing because PC-relative offsets
# may (will!) be used to address constants in the
# dataseg, so the total ordering of both must first
# be well-defined.
#
# Both dataset_list and textseg_list are physically in reverse order;
# Our result is also physically in reverse order, meaning the last
# machine instruction is first on the combined list and the first
# dataset pseudo-op is last on the combined list:
#
dataseg_and_textseg
=
cat_dataseg_and_textseg (*dataseg_list, *textseg_list, NIL)
then
clear__textseg_list__and__dataseg_list ();
# Assign initial addresses to all code labels.
#
# All JUMPs (i.e., span-dependent instructions)
# currently have the minimum possible length set,
# (i.e., one-byte offset assumed), regardless of
# whether this is actually possible.
#
# We simply sweep through the data and code in order
# summing the lengths of everything we pass and
# filling in codelabels as we come to them:
#
assign_addresses_sequentially dataseg_and_textseg;
# Iteratively expand all jumps (more precisely,
# "span dependent instructions") until each one
# has a programcounter-offset address field big
# enough to reach its target label:
#
(squash_all_jumps_to_minimum_size dataseg_and_textseg)
->
final_dataseg_plus_textseg_size_in_bytes;
# Set up codebuffer to accept our final result:
#
csb::initialize_code_segment_buffer
{
size_in_bytes => final_dataseg_plus_textseg_size_in_bytes
};
loc := 0;
write_dataseg_and_textseg_into_code_segment_buffer
#
(0, dataseg_and_textseg);
}
where
loop_count = REF 0;
nop = mu::nop ();
loc = REF 0;
fun write_bytevector_to_codesegment_buffer bytevector
=
w8v::apply
#
(\\ byte = { csb::write_byte_to_code_segment_buffer { offset => *loc, byte };
loc:= *loc+1;
}
)
#
bytevector;
fun write_dataseg_and_textseg_into_code_segment_buffer
(
pos,
codechain ! rest
)
=>
f (pos, codechain)
where
fun output_nop ()
=
write_bytevector_to_codesegment_buffer (xe::op_to_bytevector nop);
fun nops 0 => ();
nops n => { output_nop ();
nops (n - 1);
};
end;
fun f (pos, BYTES (s, r))
=>
{ write_bytevector_to_codesegment_buffer s;
f (pos + w8v::length s, r);
};
f (pos, JUMP (instruction, REF size, r))
=>
{ put_instructions = map (\\ i = xe::op_to_bytevector i);
instructions
=
put_instructions
(jmp::instantiate_span_dependent_op
{
sdi => instruction,
size_in_bytes => size,
at => pos
}
);
sum = list::fold_forward (\\ (a, b) = w8v::length a + b) 0;
n = size - sum instructions;
/*
if n > 0 then
print ("\t\t\t Inserting " + int::to_string n + "nops\n");
emit instruction;
fi;
*/
apply write_bytevector_to_codesegment_buffer instructions;
nops n;
f (pos + size, r);
};
f (pos, LABEL (lab, rest))
=>
if (pos == lbl::get_codelabel_address lab) f (pos, rest);
else error "write_dataseg_and_textseg_into_code_segment_buffer: LABEL";
fi;
f (pos, PSEUDO (pseudo_op, rest))
=>
{ my s: Ref( List( one_byte_unt::Unt ) )
= REF [];
pop::put_pseudo_op
{
pseudo_op,
loc => pos,
put_byte => (\\ w = s := w ! *s)
};
write_bytevector_to_codesegment_buffer (w8v::from_list (reverse *s));
f (pos + pop::current_pseudo_op_size_in_bytes (pseudo_op, pos), rest);
};
f (pos, NIL)
=>
write_dataseg_and_textseg_into_code_segment_buffer (pos, rest);
end;
end;
write_dataseg_and_textseg_into_code_segment_buffer (pos, [])
=>
();
end;
stipulate
# dataseg_and_textseg is a list of codechains;
# we have an outer loop here over the list of codechains,
# and here we iterate over the elements of an individual codechain.
#
# All we are doing is keeping count of the current address and
# updating jumplengths and codelabels accordingly.
#
fun do_one_codechain' { op => BYTES (bytevector, rest), address, made_progress } # For any fixed-length instruction, we just bump 'address' by length and cycle on down.
=>
do_one_codechain' { op => rest, address => address+w8v::length bytevector, made_progress };
do_one_codechain' { op => JUMP (instruction, r as REF old_size, rest), address, made_progress }
=>
{ jumpsize = jmp::sdi_size (instruction, lbl::get_codelabel_address, address); # Compute appropriate size-in-bytes for jump based on current address and target address.
# NB: If it is a forward jump, we're using a dubious target address -- might well increase as pass continues.
# We allow contraction in the first two passes; after
# that we only allow expansion, to ensure termination.
#
if ((*loop_count <= *max_variable_length_backpatch_iterations
and jumpsize != old_size
)
or jumpsize > old_size
)
r := jumpsize; # Change jump to new size.
do_one_codechain' { op => rest, address => address + jumpsize, made_progress => TRUE };
else
do_one_codechain' { op => rest, address => address + old_size, made_progress }; # Leave jump at original size.
fi;
};
do_one_codechain' { op => LABEL (l, rest), address, made_progress } # Codelabel. If its address has changed, update it, otherwise just continue.
=>
if (lbl::get_codelabel_address(l) == address)
#
do_one_codechain' { op => rest, address, made_progress };
else
lbl::set_codelabel_address (l, address);
do_one_codechain' { op => rest, address, made_progress => TRUE };
fi;
do_one_codechain' { op => PSEUDO (pseudo_op, rest), address, made_progress } # Alignment pseudo-ops may change "length" as code around them expands and contracts.
=>
{ old_size = pop::current_pseudo_op_size_in_bytes (pseudo_op, address);
new_size = { pop::adjust_labels (pseudo_op, address); pop::current_pseudo_op_size_in_bytes (pseudo_op, address); };
do_one_codechain'
{ op => rest,
address => address + new_size,
made_progress => made_progress or new_size!=old_size # XXXX????
};
};
do_one_codechain' { op => NIL, address, made_progress }
=>
(address, made_progress);
end;
fun do_one_pass_adjusting_jump_sizes_and_code_labels_as_needed dataseg_and_textseg
=
list::fold_forward
do_one_codechain
(0, FALSE) # Set initial-address to 0 and 'made_progress' flag to FALSE.
dataseg_and_textseg # Process every op in dataseg+textseg.
where
fun do_one_codechain (codechain, (address, made_progress))
=
do_one_codechain' { op => codechain, address, made_progress };
end;
herein
fun squash_all_jumps_to_minimum_size dataseg_and_textseg
=
{ (do_one_pass_adjusting_jump_sizes_and_code_labels_as_needed dataseg_and_textseg)
->
(dataseg_plus_textseg_size_in_bytes, made_progress);
if made_progress
#
loop_count := *loop_count + 1;
squash_all_jumps_to_minimum_size dataseg_and_textseg; # See if we can make more progress! :-)
else
dataseg_plus_textseg_size_in_bytes; # DONE -- each jump ("span dependent instruction") now has its final -- and workable! -- size-in-bytes selected.
fi;
};
end;
# See comments at callpoint.
#
fun assign_addresses_sequentially dataseg_and_textseg
=
list::fold_forward
(\\ (codechain, loc) = assign_addresses_sequentially' (codechain, loc))
0 # Assign address zero to first entry, etc. (Remember that only relative addresses actually matter.)
dataseg_and_textseg # List of dataseg and textseg entries to process -- i.e., everything in this compilation unit.
where
fun assign_addresses_sequentially' (BYTES (bytes, rest), loc)
=>
assign_addresses_sequentially' (rest, loc + w8v::length bytes);
assign_addresses_sequentially' (PSEUDO (pseudo_op, rest), loc)
=>
{ pop::adjust_labels (pseudo_op, loc);
assign_addresses_sequentially' (rest, loc + pop::current_pseudo_op_size_in_bytes (pseudo_op, loc)); # The issue here is that the length-in-bytes of an alignment op depends on address.
};
assign_addresses_sequentially' (JUMP (sdi, size, rest), loc)
=>
assign_addresses_sequentially' (rest, loc + *size);
assign_addresses_sequentially' (LABEL (lab, rest), loc)
=>
{ lbl::set_codelabel_address (lab, loc);
assign_addresses_sequentially' (rest, loc);
};
assign_addresses_sequentially' (NIL, loc)
=>
loc;
end;
end;
# See comments at callpoint.
#
fun cat_dataseg_and_textseg (dataseg_op ! rest, codechains, result)
=>
cat_dataseg_and_textseg (rest, codechains, PSEUDO (dataseg_op, result)); # Phase I: Add one dataseg pseudo-op per cycle to our result list.
cat_dataseg_and_textseg ([], codechains, result)
=>
reverse_codechains (codechains, [result]) # Phase II: Done everything in dataset, now add one textseg codechain per cycle to result list.
where
fun reverse_codechains (codechain ! rest, result) => reverse_codechains (rest, codechain ! result);
reverse_codechains ([], result) => result; # Phase I & II complete, return result list.
end;
end;
end;
end; # fun squash_jumps_and_write_all_machine_code_and_data_bytes_into_code_segment_buffer
end; # stipulate
}; # generic package squash_jumps_and_make_machinecode_bytevector_intel32_g
end; # stipulate
## Copyright 1999 by Bell Laboratories
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.