## squash-jumps-and-write-code-to-code-segment-buffer-sparc32-g.pkg
#
# "This version of span dependency resolution also
# fills delay slots using a few simple strategies.
# Assumption: Instructions are 32 bits."
# -- Allen Leung
#
# See docs in src/lib/compiler/back/low/doc/latex/span-dep.tex
#
# On intel32 we instead use:
#
#
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer-intel32-g.pkg# Compiled by:
#
src/lib/compiler/back/low/lib/lowhalf.lib### "Imagine if every Thursday your shoes exploded
### if you tied them the usual way. This happens
### to us all the time with computers, and nobody
### thinks of complaining."
###
### -- Jeff Raskin
# We get invoked by:
#
#
src/lib/compiler/back/low/main/sparc32/backend-lowhalf-sparc32.pkgstipulate
package odg = oop_digraph; # oop_digraph is from
src/lib/graph/oop-digraph.pkg package lbl = codelabel; # codelabel is from
src/lib/compiler/back/low/code/codelabel.pkg package lem = lowhalf_error_message; # lowhalf_error_message is from
src/lib/compiler/back/low/control/lowhalf-error-message.pkg package pp = standard_prettyprinter; # standard_prettyprinter is from
src/lib/prettyprint/big/src/standard-prettyprinter.pkg package cv = compiler_verbosity; # compiler_verbosity is from
src/lib/compiler/front/basics/main/compiler-verbosity.pkg package rwv = rw_vector; # rw_vector is from
src/lib/std/src/rw-vector.pkg #
Npp = pp::Npp;
herein
generic package squash_jumps_and_make_machinecode_bytevector_sparc32_g (
# ======================================================
#
package xe: Machcode_Codebuffer; # Machcode_Codebuffer is from
src/lib/compiler/back/low/emit/machcode-codebuffer.api # "xe" == "execode emitter".
package mcg: Machcode_Controlflow_Graph # Machcode_Controlflow_Graph is from
src/lib/compiler/back/low/mcg/machcode-controlflow-graph.api where
mcf == xe::mcf # "mcf" == "machcode_form" (abstract machine code).
also pop == xe::cst::pop; # "pop" == "pseudo_op".
package jmp: Jump_Size_Ranges # Jump_Size_Ranges is from
src/lib/compiler/back/low/jmp/jump-size-ranges.api where
mcf == mcg::mcf; # "mcf" == "machcode_form" (abstract machine code).
package dsp: Delay_Slot_Properties # Delay_Slot_Properties is from
src/lib/compiler/back/low/jmp/delay-slot-props.api where # "dsp" == "delay_slot_properties".
mcf == mcg::mcf; # "mcf" == "machcode_form" (abstract machine code).
package mu: Machcode_Universals # Machcode_Universals is from
src/lib/compiler/back/low/code/machcode-universals.api where # "mu" == "machcode_universals".
mcf == mcg::mcf; # "mcf" == "machcode_form" (abstract machine code).
package ae: Machcode_Codebuffer_Pp # Machcode_Codebuffer_Pp is from
src/lib/compiler/back/low/emit/machcode-codebuffer-pp.api where
mcf == mcg::mcf # "mcf" == "machcode_form" (abstract machine code).
also cst == xe::cst; # "cst" == "codestream".
)
: (weak) Squash_Jumps_And_Write_Code_To_Code_Segment_Buffer # Squash_Jumps_And_Write_Code_To_Code_Segment_Buffer is from
src/lib/compiler/back/low/jmp/squash-jumps-and-write-code-to-code-segment-buffer.api {
# Export to client packages:
#
package mcg = mcg; # "mcg" == "machcode_controlflow_graph";
stipulate
package mcf = mcg::mcf; # "mcf" == "machcode_form" (abstract machine code).
package rgk = mcf::rgk; # "rgk" == "registerkinds".
package pop = mcg::pop; # "pop" == "pseudo_op".
herein
fun error msg
=
lem::error("span_dependency_resolution", msg);
Code
= SDI { size: Ref( Int ), # variable sized "sdi" == "span dependent instruction" -- variable size branch/jump.
instruction: mcf::Machine_Op
}
| FIXED { size: Int,
# Size of fixed instructions.
ops: List( mcf::Machine_Op )
}
| BRANCH { instruction: List( Code ),
# Instruction with delay slot.
branch_size: Int,
fill_slot: Ref( Bool )
}
| DELAYSLOT { instruction: List( Code ),
# Instruction in delay slot.
fill_slot: Ref( Bool )
}
| CANDIDATE
# Two alternatives.
{ old_instructions: List( Code ), # Without delay slot filling.
new_instructions: List( Code ), # When delay slot is filled.
fill_slot: Ref( Bool ) # Should we fill the delay slot?
};
Compressed
= PSEUDO pop::Pseudo_Op
| LABEL lbl::Codelabel
| CODE (lbl::Codelabel, List( Code ))
;
Cccomponent = CCCOMPONENT { comp: List(Compressed) }; # In the -intel32 file, eliminating this wrapper type worked fine.
# "cccomponent" == "callgraph connected component" -- our normal unit of compilation during the nextcode passes and later.
# The assembly-language "text segment" will contain all machine instructions;
# The assembly language "data segment" will contain constants etc.
# We accumulate these separately in these two lists.
# (We need this even if we are generating machine-code directly # We currently generate assembly-code only for human display.
# without going through an assembly-code pass.)
#
my textseg_list: Ref( List( Cccomponent ) ) = REF []; # XXX BUGGO FIXME. More icky global variables.
my dataseg_list: Ref( List( pop::Pseudo_Op ) ) = REF []; # XXX BUGGO FIXME. More icky global variables.
fun clear__textseg_list__and__dataseg_list ()
=
{ textseg_list := [];
dataseg_list := [];
};
# Extract and return all constants and code from given list of basic blocks,
# saving them in (respectively) dataseg_list/textseg_list.
#
# Our basic-block list was generated in
#
#
src/lib/compiler/back/low/block-placement/make-final-basic-block-order-list-g.pkg
#
# and possibly tweaked in
#
#
src/lib/compiler/back/low/block-placement/forward-jumps-to-jumps-g.pkg #
# The textseg_list+dataseg_list we produce will be used in our below
# fun squash_jumps_and_write_all_machine_code_and_data_bytes_into_code_segment_buffer().
#
#
# We are called (only) from
src/lib/compiler/back/low/main/main/backend-lowhalf-g.pkg #
fun extract_all_code_and_data_from_machcode_controlflow_graph
#
(npp: pp::Npp, cv: cv::Compiler_Verbosity)
#
( odg::DIGRAPH graph,
blocks: List( mcg::Node ) # This basic-block list gives the final order in which all basic blocks should be concatenated to produce final machine-code bytevector.
)
=
{ blocks = map #2 blocks;
fun max_block_id (mcg::BBLOCK { id, ... } ! rest, curr)
=>
if (id > curr) max_block_id (rest, id);
else max_block_id (rest, curr);
fi;
max_block_id([], curr)
=>
curr;
end;
nnn = max_block_id (blocks, graph.capacity ());
# Order of blocks in code layout
blk_order = rwv::make_rw_vector (nnn, 0);
# Maps blknum -> label at the position of the second instruction
# This is in case the first instruction gets used to fill a delay slot
dummy = lbl::make_anonymous_codelabel ();
label_map = rwv::make_rw_vector (nnn, dummy);
fun enter_labels blocks # Enter labels into the label map:
=
list::apply
(\\ mcg::BBLOCK { id, ... } = rwv::set (label_map, id, lbl::make_anonymous_codelabel ()))
blocks;
fun block_order (blocks) # Create block order
=
list::fold_forward order 0 blocks
where
fun order (mcg::BBLOCK { id, ... }, n)
=
{ rwv::set (blk_order, id, n);
n + 1;
};
end;
fun is_fallthrough (blk1, blk2) # "Fain would I climb, yet fear I to fall." -- Sir Walter Raleigh
=
rwv::get (blk_order, blk1) + 1
==
rwv::get (blk_order, blk2);
fun is_backwards (blk1, blk2)
=
rwv::get (blk_order, blk2)
<=
rwv::get (blk_order, blk1);
# Zero length copy instruction :
#
fun is_empty_copy instruction
=
mu::instruction_kind (instruction) == mu::k::COPY
and
jmp::sdi_size (instruction, lbl::get_codelabel_address, 0) == 0;
# Find the target of a block, and return the first instruction and
# its associated label.
#
fun find_target (blknum, [ mcg::BBLOCK { id=>id1, ops => ops1, ... },
mcg::BBLOCK { id=>id2, ops => ops2, ... }
]
)
=>
{ fun extract (blknum, ops)
=
{ # Skip over empty copies:
fun find [] => NULL;
find (instrs as instruction ! rest)
=>
if (is_empty_copy instruction) find rest;
else find' rest;
fi;
end
# Okay, we are now guaranteed that the remaining
# instructions will not be used in the delay slot of
# the current block. Find the first instruction.
#
also
fun find' [first] => THE (first, rwv::get (label_map, blknum));
find' [] => NULL;
find' (_ ! rest) => find' rest;
end;
case ops
#
jmp ! rest
=>
if (mu::instruction_kind jmp == mu::k::JUMP)
#
find rest;
else find ops;
fi;
[] => NULL; # No first instruction.
esac;
};
if (is_fallthrough (blknum, id1)) extract (id2, *ops2);
elif (is_fallthrough (blknum, id2)) extract (id1, *ops1);
else NULL;
fi;
};
find_target _
=>
NULL;
end;
fun compress [] => [];
#
compress (mcg::BBLOCK { id, alignment_pseudo_op, labels, ops, ... } ! rest)
=>
{ next = map graph.node_info (graph.next id);
backward
=
list::exists
(\\ mcg::BBLOCK { id=>id1, ... } = is_backwards (id, id1))
next;
# Build the code list
fun scan ([], non_sdi_instrs, non_sdi_size, code)
=>
group (non_sdi_size, non_sdi_instrs, code);
scan (instruction ! instrs, non_sdi_instrs, non_sdi_size, code)
=>
{ (dsp::delay_slot { instruction, backward })
->
{ n, n_on, n_off, nop };
case (n_off, instrs)
#
(dsp::D_ALWAYS, delay_slot ! rest)
=>
if (dsp::delay_slot_candidate { jmp=>instruction, delay_slot }
and not (dsp::conflict { src=>delay_slot, dst=>instruction } )
)
scan (rest,[], 0,
make_candidate1 (instruction, delay_slot)
!
group (non_sdi_size, non_sdi_instrs, code));
else
scan_sdi (instruction, instrs, non_sdi_instrs, non_sdi_size, code);
fi;
_ => scan_sdi (instruction, instrs, non_sdi_instrs, non_sdi_size, code);
esac;
};
end
also
fun scan_sdi (instruction, instrs, non_sdi_instrs, non_sdi_size, code)
=
{ s = jmp::min_size_of instruction;
if (jmp::is_sdi instruction)
#
scan ( instrs,[], 0,
SDI { size=>REF s, instruction=>instruction }
!
group (non_sdi_size, non_sdi_instrs, code)
);
else
scan (instrs, instruction ! non_sdi_instrs, non_sdi_size+s, code);
fi;
}
also
fun group (0, [], code) => code;
group (size, ops, code) => FIXED { size, ops } ! code;
end
also
fun build_list instrs
=
scan'(instrs,[], 0,[])
also
fun scan'([], non_sdi_instrs, non_sdi_size, code)
=>
group (non_sdi_size, non_sdi_instrs, code);
scan'(instruction ! instrs, non_sdi_instrs, non_sdi_size, code)
=>
{ s = jmp::min_size_of instruction;
if (jmp::is_sdi instruction)
#
scan'(instrs,[], 0,
SDI { size=>REF s, instruction=>instruction }
!
group (non_sdi_size, non_sdi_instrs, code));
else
scan'(instrs, instruction ! non_sdi_instrs, non_sdi_size+s, code);
fi;
};
end
# Create a branch delay slot candidate sequence.
# jmp is the normal jump instruction; jmp' is the
# jump instruction when the delay slot is active.
#
also
fun make_candidate1 (jmp, delay_slot)
=
{ fill_slot = REF TRUE;
jmp' = dsp::enable_delay_slot { n=>FALSE, nop=>FALSE, instruction=>jmp };
CANDIDATE { new_instructions=>
[BRANCH { branch_size=>jmp::min_size_of jmp',
instruction=>build_list [jmp'],
fill_slot },
DELAYSLOT { instruction=>build_list [delay_slot],
fill_slot } ],
old_instructions=>build_list [jmp, delay_slot],
fill_slot };
}
# Create a branch delay slot candidate sequence.
# jmp is the normal jump instruction; jmp' is the
# jump instruction when the delay slot is active.
#
also
fun make_candidate2 (jmp, delay_slot, label)
=
{ fill_slot = REF TRUE;
jmp' = dsp::set_target(
dsp::enable_delay_slot { n=>TRUE, nop=>FALSE, instruction=>jmp },
label);
CANDIDATE { new_instructions=>
[BRANCH { branch_size=>jmp::min_size_of jmp',
instruction=>build_list [jmp'],
fill_slot },
DELAYSLOT { instruction=>build_list [delay_slot],
fill_slot } ],
old_instructions=>build_list [jmp],
fill_slot };
}
# Try different strategies for delay slot filling
#
also
fun fit_delay_slot (jmp, body)
=
case body # Remove empty copies
#
[] => fit_delay_slot'(jmp, body);
prev ! rest
=>
if (is_empty_copy prev) fit_delay_slot (jmp, rest);
else fit_delay_slot'(jmp, body);
fi;
esac
also
fun fit_delay_slot'(jmp, body)
=
{ my { n, n_on, n_off, nop }
=
dsp::delay_slot { instruction=>jmp, backward };
# Use the previous instruction to fill the delay slot
fun strategy1 ()
=
case (n_off, body)
#
(dsp::D_ALWAYS, delay_slot ! body)
=>
if (not (dsp::delay_slot_candidate { jmp, delay_slot } )
or dsp::conflict { src=>delay_slot, dst=>jmp } )
#
strategy2 ();
else
scan (
body,
[],
0,
[make_candidate1 (eliminate_nop jmp, delay_slot)]
);
fi;
_ => strategy2 ();
esac
# Use the first instruction in the target block to fill
# the delay slot.
# BUG FIX: note this is unsafe if this first instruction
# is also used to fill the delay slot in the target block!
also
fun strategy2 ()
=
case (n_on, find_target (id, next))
#
(dsp::D_TAKEN, THE (delay_slot, label))
=>
if (not (dsp::delay_slot_candidate { jmp, delay_slot } )
or dsp::conflict { src=>delay_slot, dst=>jmp } )
#
strategy3 ();
else
scan (body,[], 0, [make_candidate2 (eliminate_nop jmp, delay_slot, label)]);
fi;
_ => strategy3 ();
esac
# If nop is on and if the delay slot is only active on
# the fallsthru branch, then turn nullify on and eliminate
# the delay slot
also
fun strategy3 ()
=
scan (eliminate_nop (jmp) ! body,[], 0,[])
also
fun eliminate_nop (jmp)
=
case (nop, n_on)
#
(TRUE, (dsp::D_FALLTHRU
| dsp::D_NONE))
=>
dsp::enable_delay_slot { n=>TRUE, nop=>FALSE, instruction=>jmp };
_ => jmp;
esac;
strategy1();
}
also
fun process (instrs, others)
=
{ fun align_it (chunks)
=
case *alignment_pseudo_op
#
NULL => chunks;
THE p => PSEUDO (p) ! chunks;
esac;
code
=
case instrs
#
[] => [];
jmp ! body
=>
case (mu::instruction_kind jmp)
#
mu::k::JUMP
=>
fit_delay_slot (jmp, body);
_ => scan (instrs, [], 0, []);
esac;
esac;
align_it
(map LABEL *labels @
CODE (rwv::get (label_map, id), code) ! others);
};
process (*ops, compress rest);
};
end; # fun compress
graph.graph_info -> mcg::GRAPH_INFO { dataseg_pseudo_ops, ... };
block_order blocks;
enter_labels blocks;
textseg_list := CCCOMPONENT { comp=>compress blocks } ! *textseg_list;
dataseg_list := *dataseg_pseudo_ops @ *dataseg_list;
}; # fun bbsched
fun squash_jumps_and_write_all_machine_code_and_data_bytes_into_code_segment_buffer (npp:Npp, cv: cv::Compiler_Verbosity)
=
{ fun labels (PSEUDO pseudo_op ! rest, loc)
=>
{ pop::adjust_labels (pseudo_op, loc);
#
labels (rest, loc+pop::current_pseudo_op_size_in_bytes (pseudo_op, loc));
};
labels (LABEL lab ! rest, loc)
=>
{ lbl::set_codelabel_address (lab, loc);
#
labels (rest, loc);
};
labels (CODE (lab, code) ! rest, loc)
=>
{ fun size (FIXED { size, ... } ) => size;
size (SDI { size, ... } ) => *size;
size (BRANCH { instruction, ... } ) => size_list (instruction, 0);
size (DELAYSLOT { instruction, ... } ) => size_list (instruction, 0);
size (CANDIDATE { old_instructions, new_instructions, fill_slot, ... } )
=>
size_list
(
if *fill_slot new_instructions;
else old_instructions;
fi,
0
);
end
also
fun size_list ([], n)
=>
n;
size_list (code ! rest, n)
=>
size_list (rest, size code + n);
end;
lbl::set_codelabel_address (lab, loc+4);
labels (rest, size_list (code, loc));
};
labels ([], loc)
=>
loc;
end;
fun init_labels cccomponents
=
list::fold_forward
(\\ (CCCOMPONENT { comp }, loc) = labels (comp, loc))
0
cccomponents;
delay_slot_bytes
=
dsp::delay_slot_bytes;
/*
Suppose we have:
u
jmp L1
nop
...
L1: i
j
k
I insert a fake label L2:
L1: i
L2: j
k
L2 is the label in CODE (label, code).
If instruction u cannot be put into the delay slot of jmp L1 I try
to put i into the delay slot of L1. This creates code like this:
u
jmp L2
i
...
L1: i
L2: j
k
-- Allen Leung
*/
fun adjust (CCCOMPONENT { comp, ... }, pos, changed)
=
{ fun scan (PSEUDO pseudo_op ! rest, pos, changed)
=>
{ chgd = pop::adjust_labels (pseudo_op, pos);
scan (rest, pos+pop::current_pseudo_op_size_in_bytes (pseudo_op, pos), changed or chgd);
};
scan (LABEL lab ! rest, pos, changed)
=>
if (lbl::get_codelabel_address(lab) == pos)
#
scan (rest, pos, changed);
else
lbl::set_codelabel_address (lab, pos);
scan (rest, pos, TRUE);
fi;
scan (CODE (lab, code) ! rest, pos, changed)
=>
{ my (new_pos, changed)
=
do_code (code, pos, changed);
if (lbl::get_codelabel_address (lab) == pos+4)
#
scan (rest, new_pos, changed);
else
lbl::set_codelabel_address (lab, pos+4);
scan (rest, new_pos, TRUE);
fi;
};
scan([], pos, changed)
=>
(pos, changed);
end
also
fun do_code ([], pos, changed)
=>
(pos, changed);
do_code (code ! rest, pos, changed)
=>
case code
#
FIXED { size, ... }
=>
do_code (rest, pos+size, changed);
SDI { size, instruction }
=>
{ new_size = jmp::sdi_size (instruction, lbl::get_codelabel_address, pos);
#
if (new_size <= *size)
#
do_code (rest,*size + pos, changed);
else
size := new_size;
do_code (rest, new_size+pos, TRUE);
fi;
};
DELAYSLOT { instruction, fill_slot, ... }
=>
{ (do_code (instruction, pos, changed))
->
(new_pos, changed);
do_code (
#
rest,
new_pos,
if (new_pos - pos != delay_slot_bytes)
#
fill_slot := FALSE;
TRUE;
else
changed;
fi
);
};
BRANCH { instruction, branch_size, fill_slot, ... }
=>
{ (do_code (instruction, pos, changed))
->
(new_pos, changed);
do_code (
#
rest,
new_pos,
if (new_pos - pos != branch_size)
#
fill_slot := FALSE;
TRUE;
else
changed;
fi
);
};
CANDIDATE { old_instructions, new_instructions, fill_slot, ... }
=>
do_code(
if *fill_slot new_instructions;
else old_instructions;
fi
@ rest,
pos,
changed
);
esac;
end;
scan (comp, pos, changed);
}; # fun adjust
fun adjust_labels cccomponents
=
list::fold_forward f (0, FALSE) cccomponents
where
fun f (cl, (pos, chgd))
=
adjust (cl, pos, chgd);
end;
fun fixpoint zl i
=
{ my (size, changed)
=
adjust_labels zl;
changed ?? fixpoint zl (i+1)
:: size;
};
dump_machcode_controlflow_graph_after_span_dependent_phase
=
lowhalf_control::make_bool (
"dump_machcode_controlflow_graph_after_span_dependent_phase",
"whether flow graph is shown after spandep phase"
);
fun put_all_cccomponents
#
(buf: xe::cst::Codebuffer (xe::mcf::Machine_Op, B, C, D))
size
compressed
=
{ fun put_cccomponent (CCCOMPONENT { comp }, loc)
=
{ put_ops = apply buf.put_op;
fun nops 0 => ();
#
nops n => if (n < 0)
#
error "nops";
else
buf.put_op (mu::nop ());
nops (n - 4);
fi;
end;
fun process (PSEUDO pseudo_op, loc)
=>
{ buf.put_pseudo_op pseudo_op;
loc+pop::current_pseudo_op_size_in_bytes (pseudo_op, loc);
};
process (LABEL label, loc)
=>
{ address = lbl::get_codelabel_address label;
#
if (address == loc) buf.put_private_label label; loc;
elif (address > loc) nops (address-loc); buf.put_private_label label; address;
else error "label";
fi;
};
process (CODE (lab, code), loc)
=>
{ fun e (FIXED { ops, size, ... }, loc)
=>
{ put_ops ops;
loc + size;
};
e (SDI { size, instruction }, loc)
=>
{ put_ops (jmp::instantiate_span_dependent_op { sdi => instruction,
size_in_bytes => *size,
at => loc
}
);
*size + loc;
};
e (BRANCH { instruction, ... }, loc) => fold_forward e loc instruction;
e (DELAYSLOT { instruction, ... }, loc) => fold_forward e loc instruction;
e (CANDIDATE { new_instructions, old_instructions, fill_slot, ... }, loc)
=>
fold_forward
e
loc
(*fill_slot ?? new_instructions
:: old_instructions
);
end;
fold_forward e loc code;
};
end;
fold_forward process loc comp;
};
buf.start_new_cccomponent size;
fold_forward put_cccomponent 0 compressed;
};
# The dataList is in reverse order and the cccomponents are in reverse
#
fun data_cccomponent ([], results) => CCCOMPONENT { comp=>results };
data_cccomponent (d ! dl, results) => data_cccomponent (dl, PSEUDO d ! results);
end;
compressed
=
reverse (data_cccomponent (*dataseg_list, []) ! *textseg_list)
then
clear__textseg_list__and__dataseg_list ();
init_labels compressed;
put_all_cccomponents (xe::make_codebuffer []) (fixpoint compressed 0) compressed;
case npp
#
NULL => ();
#
THE pp => { put_all_cccomponents (ae::make_codebuffer pp []) 0 compressed;
();
};
esac;
();
}; # fun finish
end;
}; # generic package squash_jumps_and_make_machinecode_bytevector_sparc32_g
end;