## make-sourcecode-for-translate-machcode-to-execode-xxx-g-package.pkg -- derived from ~/src/sml/nj/smlnj-110.60/MLRISC/Tools/ADL/mdl-gen-mc.sml
#
# This module generates the machine code emitter of an architecture
# given an architecture description.
#
# This currently generates
#
#
src/lib/compiler/back/low/pwrpc32/emit/translate-machcode-to-execode-pwrpc32-g.codemade.pkg#
src/lib/compiler/back/low/sparc32/emit/translate-machcode-to-execode-sparc32-g.codemade.pkg#
# Due to the complexity of the Intel32 (x86) instruction-set encoding, this
# module has to be hand-coded for that platform:
#
#
src/lib/compiler/back/low/intel32/translate-machcode-to-execode-intel32-g.pkg# Compiled by:
#
src/lib/compiler/back/low/tools/arch/make-sourcecode-for-backend-packages.lib### "First rate mathematicians choose first rate people,
### but second rate mathematicians choose third rate people."
###
### -- Andre Weil
stipulate
package ard = architecture_description; # architecture_description is from
src/lib/compiler/back/low/tools/arch/architecture-description.pkgherein
api Make_Sourcecode_For_Translate_Machcode_To_Execode_Xxx_G_Package {
#
make_sourcecode_for_translate_machcode_to_execode_xxx_g_package
:
ard::Architecture_Description -> Void;
};
end;
stipulate
package ard = architecture_description; # architecture_description is from
src/lib/compiler/back/low/tools/arch/architecture-description.pkg package err = adl_error; # adl_error is from
src/lib/compiler/back/low/tools/line-number-db/adl-error.pkg package lms = list_mergesort; # list_mergesort is from
src/lib/src/list-mergesort.pkg package mst = adl_symboltable; # adl_symboltable is from
src/lib/compiler/back/low/tools/arch/adl-symboltable.pkg package raw = adl_raw_syntax_form; # adl_raw_syntax_form is from
src/lib/compiler/back/low/tools/adl-syntax/adl-raw-syntax-form.pkg package rsj = adl_raw_syntax_junk; # adl_raw_syntax_junk is from
src/lib/compiler/back/low/tools/adl-syntax/adl-raw-syntax-junk.pkg package rst = adl_raw_syntax_translation; # adl_raw_syntax_translation is from
src/lib/compiler/back/low/tools/adl-syntax/adl-raw-syntax-translation.pkg package smj = sourcecode_making_junk; # sourcecode_making_junk is from
src/lib/compiler/back/low/tools/arch/sourcecode-making-junk.pkg package spp = simple_prettyprinter; # simple_prettyprinter is from
src/lib/prettyprint/simple/simple-prettyprinter.pkg package u32 = one_word_unt;
#
++ = spp::CONS; infix my ++ ;
alpha = spp::ALPHABETIC;
iblock = spp::INDENTED_BLOCK;
indent = spp::INDENT;
nl = spp::NEWLINE;
nop = spp::NOP;
punct = spp::PUNCTUATION;
herein
# We are run-time invoked in:
#
src/lib/compiler/back/low/tools/arch/make-sourcecode-for-backend-packages-g.pkg # We are compile-time invoked in:
#
src/lib/compiler/back/low/tools/arch/make-sourcecode-for-backend-packages.pkg package make_sourcecode_for_translate_machcode_to_execode_xxx_g_package
: (weak) Make_Sourcecode_For_Translate_Machcode_To_Execode_Xxx_G_Package
{
infix my <<
|| && ;
<< = u32::(<<);
|| = u32::bitwise_or;
&& = u32::bitwise_and;
bitwise_not = u32::bitwise_not;
fun make_sourcecode_for_translate_machcode_to_execode_xxx_g_package
#
architecture_description
=
smj::write_sourcecode_file
{
architecture_description,
created_by_package => "src/lib/compiler/back/low/tools/arch/make-sourcecode-for-translate-machcode-to-execode-xxx-g-package.pkg",
#
subdir => "emit", # Relative to file containing architecture description.
make_filename => \\ architecture_name = if (architecture_name=="intel32") sprintf "translate-machcode-to-execode-%s-g.codemade.pkg.unused" architecture_name;
else sprintf "translate-machcode-to-execode-%s-g.codemade.pkg" architecture_name;
fi, # architecture_name can be "pwrpc32"
|"sparc32"|"intel32".
code => [ pkg_code ]
}
where
# Name of the generic and api:
#
# str_name = smj::make_package_name architecture_description "MCEmitter";
api_name = "Machcode_Codebuffer";
arch_name = ard::architecture_name_of architecture_description;
archl = string::to_lower arch_name;
archm = string::to_mixed arch_name;
# Is debugging turned on?
#
debug_on = ard::debugging architecture_description "MC";
# Arguments for the generic:
#
args = [ "package mcf: " + smj::make_api_name architecture_description "Machcode"
+ (sprintf ";\t\t\t\t\t\t# Machcode_%s\t\tis from src/lib/compiler/back/low/%s/code/machcode-%s.codemade.api" archm archl archl),
"",
"package tce: Treecode_Eval\t\t\t\t\t\t# Treecode_Eval\t\t\tis from src/lib/compiler/back/low/treecode/treecode-eval.api",
" where",
" tcf == mcf::tcf;\t\t\t\t\t# \"tcf\" == \"treecode_form\".",
"",
"package cst: Codebuffer;\t\t\t\t\t\t# Codebuffer\t\t\tis from src/lib/compiler/back/low/code/codebuffer.api",
"",
"package csb: Code_Segment_Buffer;\t\t\t\t\t# Code_Segment_Buffer\t\tis from src/lib/compiler/back/low/emit/code-segment-buffer.api"
]
@
if debug_on
#
[ "package asm_emitter: Machcode_Codebuffer",
" where I = Instr and S = treecode_stream::S.Stream"
];
else
[];
fi;
instruction_formats = ard::instruction_formats_of architecture_description; # Instruction formats declared in the description file.
# Instruction widths that are defined in this architecture:
#
widths = lms::sort_list_and_drop_duplicates
#
int::compare
#
(fold_backward
\\ ((THE w, _), l) => w ! l;
(_, l) => l;
end
[]
instruction_formats
);
# The Instruction package:
#
symboltable
=
mst::find_package
(ard::symboltable_of architecture_description)
(raw::IDENT([], "Instruction"));
# Make sure that all widths are
# either 8, 16, 24, or 32 bits: # 64-bit issue XXX BUGGO FIXME
#
apply
(\\ w =
if (w < 8 or w > 32 or w % 8 != 0)
#
err::error ("instruction format must be 8, 16, 24, or 32 bits; found" + int::to_string w);
fi
)
widths;
endian = ard::endian_of architecture_description; # Endian -- LITTLE or BIG
fun emit id # Name of an emit function.
=
"put_" + id;
# For each width N, generate a function e_wordN to
# emit a word of that width. A typical \\ looks like:
#
# fun e_word32 w
# =
# { b8 = w;
# w = w >> 0ux8;
# b16 = w;
# w = w >> 0ux8;
# b24 = w;
# w = w >> 0ux8;
# b32 = w;
#
# { put_byte_w b32;
# put_byte_w b24;
# put_byte_w b16;
# put_byte_w b8 ;
# };
# };
#
#
put_funs
=
raw::FUN_DECL (map make_emit_word_fun widths)
where
# dummy_bind = raw::FUN("dummy",[]);
fun make_emit_word_fun width
=
raw::FUN ( "e_word" + int::to_string width,
[ raw::CLAUSE ( [raw::IDPAT "w"],
NULL,
rsj::let_fn ( debug @ reverse (f width),
raw::SEQUENTIAL_EXPRESSIONS body
)
)
]
)
where
# This fn generates the required
# extract-a-byte statement pairs like
#
# b16 = w;
# w = w >> 0ux8;
#
fun f 0 => [];
f 8 => [ rsj::my_fn ("b8", rsj::id "w") ]; # "b8 = w;"
f b => rsj::my_fn ("b" + int::to_string b, rsj::id "w") # "b16 = w;" or such.
! rsj::my_fn ("w", rsj::slr (rsj::id "w", rsj::unt1expression 0u8)) # "w = w >> 0ux8;" -- "slr" == "shift logical right" where "logical" means to shift in zeros at high end.
! f (b - 8);
end;
# This fn generates the required statements like:
#
# put_byte_w b16;
#
fun g 0 => [];
g b => rsj::app ("put_byte_w", rsj::id ("b" + int::to_string b)) ! g (b - 8); # "put_byte_w b32;" or such.
end;
debug =
if debug_on [ rsj::my_fn ("_", rsj::id "print(\"0x\"$u32::to_string w$\"\\t\")") ];
else [];
fi;
body = case endian
#
raw::BIG => g width;
raw::LITTLE => reverse (g width);
esac;
end;
end;
# Functions for emitting the encoding for a cell:
#
cell_funs
=
raw::FUN_DECL (map mk_emit_cell (ard::registersets_of architecture_description))
where
fun mk_emit_cell (raw::REGISTER_SET { name, from, ... } )
=
rsj::fun_fn'
( emit name,
raw::IDPAT "r",
raw::APPLY_EXPRESSION
( rsj::id' ["u32"] "from_int",
raw::APPLY_EXPRESSION (rsj::id' ["rkj"] "hardware_register_id_of", rsj::id "r")
)
);
end;
# For each enum T defined in the package Instruction that
# has code generation annotations defined, generate a function put_T.
#
sumtype_funs
=
{ dbs = mst::sumtype_definitions symboltable;
#
raw::FUN_DECL (mk_emit_sumtypes (dbs,[]));
}
where
fun word w
=
raw::TYPED_EXPRESSION (rsj::unt1expression w, rsj::unt1_type);
fun mk_emit_sumtypes ([], fbs)
=>
reverse fbs;
mk_emit_sumtypes (raw::SUMTYPE { name, mc, cbs, ... } ! dbs, fbs)
=>
mk_emit_sumtypes
( dbs,
if found raw::FUN (emit name, cs) ! fbs;
else fbs;
fi
)
where
fun missing ()
=
err::error ("machine encoding is missing for constructor " + name);
fun loop (w, [], cs, found)
=>
(w, reverse cs, found);
loop (w, (cb as raw::CONSTRUCTOR { name, type, mc, ... } ) ! cbs, cs, found)
=>
loop (w, cbs, rst::map_cons_to_clause { prefix=> ["mcf"], pattern => \\ p=p, expression => e } cb ! cs, found)
where
my (e, found)
=
case (mc, w)
#
(NULL, THE (x ! _)) => (word (u32::from_int x), TRUE);
(NULL, THE []) => { missing(); (word 0u0, TRUE); };
(NULL, NULL) => (rsj::app ("error", rsj::string_constant_in_expression name), found);
(THE (raw::WORDMC w'), THE (w ! l'))
=>
{ if (u32::from_int w != w')
#
err::error ( "constructor " + name + " encoding is 0x"
+ u32::to_string w' + " but is expecting 0x"
+ u32::to_string (u32::from_int w)
);
fi;
(word w', TRUE);
};
(THE (raw::WORDMC w'), THE []) => (word w', TRUE);
(THE (raw::WORDMC w'), NULL ) => (word w', TRUE);
(THE (raw::EXPMC e), _ ) => (e, TRUE);
esac;
w = case w NULL => { NULL; };
THE (_ ! w) => { THE w; };
THE [] => { missing(); NULL; };
esac;
end;
end;
(loop (mc, cbs, [], FALSE)) -> (w, cs, found);
case w
#
THE(_ ! _) => err::error("Extra machine encodings in enum " + name);
_ => ();
esac;
end; # where
mk_emit_sumtypes _
=>
raise exception DIE "Bug: Unsupported case in emit/mk_emit_sumtypes";
end; # fun mk_emit_sumtypes ([], fbs)
end;
# Generate a formatting function for each machine instruction format
# defined in the architecture description.
#
instruction_format_funs
=
raw::FUN_DECL (map make_format (ard::instruction_formats_of architecture_description))
where
fun make_format (THE width, raw::INSTRUCTION_FORMAT (format_name, fields, NULL))
=>
make_defined_format (width, format_name, fields);
make_format (NULL, raw::INSTRUCTION_FORMAT (format_name, fields, NULL))
=>
{ err::error ("missing width in format " + format_name);
raw::FUN (format_name, []);
};
make_format (_, raw::INSTRUCTION_FORMAT (format_name, fields, THE e))
=>
make_format_fun (format_name, fields, e);
end
# Generate an expression that builds up the format
#
also
fun make_defined_format (total_width, format_name, fields)
=
make_format_fun
( format_name,
fields,
rsj::app
( "e_word" + int::to_string total_width,
fold_backward rsj::plus (rsj::unt1expression constant) exps
)
)
where
# Factor out the constant and the variable part:
#
fun loop ([], bit, constant, exps)
=>
(bit, constant, exps);
loop (raw::INSTRUCTION_BITFIELD { id, width, value, sign, ... } ! fs, bit, constant, exps)
=>
loop (fs, bit+width, constant, exps)
where
width =
case width
#
raw::WIDTH w => w;
raw::RANGE (from, to)
=>
{ if (bit != from)
#
err::error ( "field " + id
+ " in format " + format_name
+ " starts from bit " + int::to_string from
+ " (bit " + int::to_string bit + " expected"
);
fi;
to - from + 1;
};
esac;
mask = (0u1 << unt::from_int width) - 0u1;
my (constant, exps)
=
case value
#
THE v =>
{ if ((v && (bitwise_not mask)) != 0u0)
#
err::error ( "value 0x" + u32::to_string v
+ "in field " + id
+ " is out of range"
);
fi;
(constant
|| (v << unt::from_int bit), exps);
};
NULL =>
(constant, e ! exps)
where
e = rsj::id id;
e = if (sign == raw::UNSIGNED) e;
else rsj::bitwise_and (e, rsj::unt1expression mask);
fi;
e = rsj::sll (e, rsj::unt1expression (u32::from_int bit)); # sll == shift left logical
end;
esac;
end;
end;
my (real_width, constant, exps)
=
loop (reverse fields, 0, 0u0, []);
if (real_width != total_width)
#
err::error ( "format " + format_name + " is declared to have "
+ int::to_string total_width + " bits but I counted "
+ int::to_string real_width
);
fi;
end
# Generate a format function that includes implicit
# argument conversions.
#
also
fun make_format_fun (id, fields, expression)
=
raw::FUN
( id,
[ raw::CLAUSE
(
[ raw::RECORD_PATTERN
( fold_backward
\\ (raw::INSTRUCTION_BITFIELD { id => "", ... }, fs) => fs;
(raw::INSTRUCTION_BITFIELD { value => THE _, ... }, fs) => fs;
(raw::INSTRUCTION_BITFIELD { id, ... }, fs) => (id, raw::IDPAT id) ! fs;
end
[]
fields,
FALSE
)
],
NULL,
rsj::let_fn
( fold_backward
\\ (raw::INSTRUCTION_BITFIELD { id, cnv => raw::NOCNV, ... }, ds) => ds;
(raw::INSTRUCTION_BITFIELD { id, cnv => raw::CELLCNV k, ... }, ds) => rsj::my_fn (id, rsj::app (emit k, rsj::id id)) ! ds;
(raw::INSTRUCTION_BITFIELD { id, cnv => raw::FUNCNV f, ... }, ds) => rsj::my_fn (id, rsj::app (emit f, rsj::id id)) ! ds;
end
[]
fields,
expression
)
)
]
);
end;
# The main emitter function:
#
put_instr_fun
=
raw::FUN_DECL [ raw::FUN ("put_op", map make_emit_instruction instructions)]
where
fun make_emit_instruction (cb as raw::CONSTRUCTOR { name, mc, ... } )
=
rst::map_cons_to_clause
#
{ prefix => ["mcf"],
pattern => \\ p=p,
expression => case mc
#
THE (raw::EXPMC e) => e;
_ => rsj::app ("error", rsj::string_constant_in_expression name);
esac
}
#
cb;
instructions = ard::base_ops_of architecture_description;
end;
# Body of the package:
#
pkg_body
=
[ raw::VERBATIM_CODE
[
"\t\t\t\t\t\t\t\t\t# Machcode_Codebuffer\t\tis from src/lib/compiler/back/low/emit/machcode-codebuffer.api",
"# Export to client packages:",
"#",
"package cst = cst;",
"package mcf = mcf;\t\t\t\t\t\t\t# \"mcf\" == \"machcode_form\" (abstract machine code).",
"",
"# Local abbreviations:",
"#",
"package rgk = mcf::rgk;\t\t\t\t\t\t\t# \"rgk\" == \"registerkinds\".",
"package lac = mcf::lac;\t\t\t\t\t\t\t# \"lac\" == \"late_constant\".",
"package csb = csb;",
"package pop = cst::pop;",
"",
"# " + (string::to_upper (ard::architecture_name_of architecture_description)) + " is "
+
case endian raw::BIG => "big";
raw::LITTLE => "little";
esac
+
" endian.",
""
],
smj::error_handler architecture_description (\\ architecture_name = sprintf "%sMC" (string::to_upper architecture_name)),
raw::VERBATIM_CODE
[ "fun make_codebuffer _",
" =",
" { infix my &
| << >> >>> ;",
" #",
" (<<) = u32::(<<);",
" (>>) = u32::(>>);",
" (>>>) = u32::(>>>);",
" (
|) = u32::bitwise_or;",
" (&) = u32::bitwise_and;",
"",
" fun put_bool FALSE => 0u0: u32::Unt;",
" put_bool TRUE => 0u1: u32::Unt;",
" end;",
"",
" put_int = u32::from_int;",
"",
" fun put_word w = w;",
" fun put_label l = u32::from_int (lbl::get_codelabel_address l);",
" fun put_label_expression le = u32::from_int (tce::value_of le);",
" fun put_const lateconst = u32::from_int (lac::late_constant_to_int lateconst);",
"",
" loc = REF 0;",
"",
" # Emit a byte:",
" #",
" fun put_byte byte",
" =",
" { offset = *loc;",
" loc := offset + 1;",
" csb::write_byte_to_code_segment_buffer { offset, byte };",
" };",
"",
" # Emit the low order byte of a word.",
" # Note: from_large_unt strips the high order bits!",
" #",
" fun put_byte_w word",
" =",
" { offset = *loc;",
" loc := offset + 1; ",
" csb::write_byte_to_code_segment_buffer { offset, byte => one_byte_unt::from_large_unt word };",
" };",
"",
" fun do_nothing _ = ();",
" fun fail _ = raise exception DIE \"MCEmitter\";",
" fun get_notes () = error \"get_notes\";",
"",
" fun put_pseudo_op pseudo_op",
" =",
" pop::put_pseudo_op { pseudo_op, loc => *loc, put_byte };",
"",
" fun start_new_cccomponent size_in_bytes",
" =",
" { csb::initialize_code_segment_buffer { size_in_bytes };",
" loc := 0;",
" };",
"",
if debug_on "my s::STREAM { emit=asm, ... } = assembler::make_codebuffer()";
else "";
fi
],
put_funs,
cell_funs,
sumtype_funs,
instruction_format_funs,
ard::decl_of architecture_description "MC",
raw::VERBATIM_CODE
[ " fun emitter instruction",
" =",
" {"
],
put_instr_fun,
raw::VERBATIM_CODE [ "",
if debug_on " put_op instruction; asm instruction;";
else " put_op instruction;";
fi,
" };",
"",
"fun put_op (mcf::NOTE { op, ... } ) => put_op op;",
" put_op (mcf::BASE_OP i) => emitter i;",
" put_op (mcf::LIVE _) => ();",
" put_op (mcf::DEAD _) => ();",
" put_op _ => error \"put_op\";",
"end;",
"",
" { start_new_cccomponent, ",
" put_pseudo_op, ",
" put_op, ",
" get_completed_cccomponent=>fail, ",
" put_private_label=>do_nothing, ",
" put_public_label=>do_nothing, ",
" put_comment=>do_nothing, ",
" put_fn_liveout_info=>do_nothing, ",
" put_bblock_note=>do_nothing, ",
" get_notes",
" };",
"};"
]
];
pkg_code
=
spp::CAT [
if (arch_name == "intel32")
#
punct "# This file is unused in favor of" ++ nl ++
punct "# src/lib/compiler/back/low/intel32/translate-machcode-to-execode-intel32-g.pkg" ++ nl ++
punct "# -- 2011-04-02 CrT" ++ nl ++
punct "#" ++ nl;
else
punct "# We are invoked from:" ++ nl ++
punct "#" ++ nl ++
punct (sprintf "# src/lib/compiler/back/low/main/%s/backend-lowhalf-%s.pkg" archl archl) ++ nl ++
punct "#" ++ nl;
fi,
alpha "stipulate", nl,
iblock(indent++alpha "package lbl = codelabel;\t\t\t\t\t\t\t# codelabel\t\t\tis from src/lib/compiler/back/low/code/codelabel.pkg"), nl,
iblock(indent++alpha "package lem = lowhalf_error_message;\t\t\t\t\t# lowhalf_error_message\t\tis from src/lib/compiler/back/low/control/lowhalf-error-message.pkg"), nl,
iblock(indent++alpha "package rkj = registerkinds_junk;\t\t\t\t\t\t# registerkinds_junk\t\tis from src/lib/compiler/back/low/code/registerkinds-junk.pkg"), nl,
iblock(indent++alpha "package u32 = one_word_unt;\t\t\t\t\t\t\t# one_word_unt\t\t\t\tis from src/lib/std/one-word-unt.pkg"), nl,
alpha "herein", nl, nl,
iblock (indent ++ smj::make_generic
architecture_description
(\\ architecture_name = sprintf "translate_machcode_to_execode_%s_g" architecture_name)
args
smj::WEAK_SEAL
api_name
pkg_body
),
alpha "end;", nl, nl
];
end; # fun make_sourcecode_for_translate_machcode_to_execode_xxx_g_package
}; # package make_sourcecode_for_translate_machcode_to_execode_xxx_g_package
end; # stipulate