#
# Generate multiplication/division by a constant.
# This module is mainly used for architectures without fast integer multiply.
#
# -- Allen Leung
# Compiled by:
#
src/lib/compiler/back/low/lib/lowhalf.lib### "Power corrupts. Absolute power is kind of neat."
###
### -- John Lehman, US Secretary of the Navy, 1981-1987
stipulate
package lem = lowhalf_error_message; # lowhalf_error_message is from
src/lib/compiler/back/low/control/lowhalf-error-message.pkg package rkj = registerkinds_junk; # registerkinds_junk is from
src/lib/compiler/back/low/code/registerkinds-junk.pkg package tcp = treecode_pith; # treecode_pith is from
src/lib/compiler/back/low/treecode/treecode-pith.pkg package u = unt; # unt is from
src/lib/std/unt.pkgherein
# This generic is invoked (only) from:
#
#
src/lib/compiler/back/low/pwrpc32/treecode/translate-treecode-to-machcode-pwrpc32-g.pkg #
src/lib/compiler/back/low/sparc32/treecode/translate-treecode-to-machcode-sparc32-g.pkg #
generic package treecode_mult_g (
# ===============
#
package mcf: Machcode_Form; # Machcode_Form is from
src/lib/compiler/back/low/code/machcode-form.api package tcf: Treecode_Form; # Treecode_Form is from
src/lib/compiler/back/low/treecode/treecode-form.api int_width: Int; # width of integer type
Argi = { r: rkj::Codetemp_Info, i: Int, d: rkj::Codetemp_Info };
Arg = { r1: rkj::Codetemp_Info, r2: rkj::Codetemp_Info, d: rkj::Codetemp_Info };
# These never trap overflow:
#
mov: { r: rkj::Codetemp_Info, d: rkj::Codetemp_Info } -> mcf::Machine_Op;
add: Arg -> mcf::Machine_Op;
slli: Argi -> List( mcf::Machine_Op );
srli: Argi -> List( mcf::Machine_Op );
srai: Argi -> List( mcf::Machine_Op );
)
( trapping: Bool; # trap on overflow?
mult_cost: Ref( Int ); # Cost of multiplication
# Basic ops; these have to implemented by the architecture
# if trapping == TRUE, then the following MUST trap on overflow
addv: Arg -> List( mcf::Machine_Op );
subv: Arg -> List( mcf::Machine_Op );
/* some architectures, like the PA-RISC
* have these types of special ops
* if trapping == TRUE, then the following MUST also trap on overflow
*/
sh1addv: Null_Or( Arg -> List( mcf::Machine_Op ) ); # A*2 + b
sh2addv: Null_Or( Arg -> List( mcf::Machine_Op ) ); # a*4 + b
sh3addv: Null_Or( Arg -> List( mcf::Machine_Op ) ); # A*8 + b
)
( signed: Bool; # signed?
)
: (weak) Treecode_Mult_Div # Treecode_Mult_Div is from
src/lib/compiler/back/low/treecode/treecode-mult.api {
# Export to client packages:
#
package tcf = tcf; # "tcf" == "treecode_form".
package mcf = mcf; # "mcf" == "machcode_form" (abstract machine code).
package rgk = mcf::rgk; # "rgk" == "registerkinds".
Arg = Argi;
infix my << >> >>>
| & ;
#
itow = u::from_int;
wtoi = u::to_int_x;
(<<) = u::(<<);
(>>) = u::(>>);
(>>>) = u::(>>>);
(
|) = u::bitwise_or;
(&) = u::bitwise_and;
exception TOO_COMPLEX;
fun error msg = lem::error("treecode_mult", msg);
always_zero_register # NULL if no such register exists on the architecture. x86 has no such register; many RISC machines do.
=
rgk::get_always_zero_register rkj::INT_REGISTER;
shiftri = if signed srai; else srli;fi;
fun is_power_of2 w
=
((w - 0u1) & w) == 0u0;
fun log2 n # n must be > 0!!!
=
loop (n, 0)
where
fun loop (0u1, pow) => pow;
loop (w, pow) => loop (w >> 0u1, pow+1);
end;
end;
fun zero_bits (w, low_zero_bits)
=
if ((w & 0u1) == 0u1)
(w, low_zero_bits);
else
zero_bits (w >> 0u1, low_zero_bits+0u1);
fi;
# Non overflow trapping version of multiply:
# We can use add, shadd, shift, sub to perform the multiplication
#
fun multiply_non_trap { r, i, d }
=
{ fun mult (r, w, max_cost, d)
=
if (max_cost <= 0)
raise exception TOO_COMPLEX;
elif (is_power_of2 w ) slli { r, i=>log2 w, d };
else
case (w, sh1addv, sh2addv, sh3addv)
# some base cases
(0u3, THE f, _, _) => f { r1=>r, r2=>r, d };
(0u5, _, THE f, _) => f { r1=>r, r2=>r, d };
(0u9, _, _, THE f) => f { r1=>r, r2=>r, d };
_ =>
# recurse on the bit patterns of w
{ tmp = rgk::make_int_codetemp_info ();
if ((w & 0u1) == 0u1)
# Low order bit is 1
if ((w & 0u2) == 0u2)
# Second bit is 1
mult (r, w+0u1, max_cost - 1, tmp) @
subv { r1=>tmp, r2=>r, d };
else # Second bit is 0
mult (r, w - 0u1, max_cost - 1, tmp) @
addv { r1=>tmp, r2=>r, d };
fi;
else # Low order bit is 0
my (w, low_zero_bits)
=
zero_bits (w, 0u0);
mult (r, w, max_cost - 1, tmp) @
slli { r=>tmp, i=>wtoi low_zero_bits, d };
fi;
};
esac;
fi;
if (i <= 0 ) raise exception TOO_COMPLEX;
elif (i == 1 ) [mov { r, d } ];
else mult (r, itow i,*mult_cost, d);
fi;
};
# The semantics of roundToZero { r, i, d } is:
# if r >= 0 then d <- r
# else d <- r + i
#
fun round_to_zero void_expression { type, r, i, d }
=
{ reg = tcf::CODETEMP_INFO (type, r);
void_expression
(tcf::LOAD_INT_REGISTER
( type,
d,
tcf::CONDITIONAL_LOAD
( type,
tcf::CMP (type, tcf::GE, reg, tcf::LITERAL 0),
reg,
tcf::ADD (type, reg, tcf::LITERAL (tcf::mi::from_int (int_width, i)))
) ) );
};
# Simulate rounding towards zero for signed division
#
fun round_div { mode=>tcf::ROUND_TO_NEGINF, r, ... }
=>
([], r); # No rounding necessary.
round_div { mode=>tcf::ROUND_TO_ZERO, void_expression, r, i }
=>
if (not signed)
#
([], r); # No rounding for unsigned division.
else
d = rgk::make_int_codetemp_info ();
if (i == 2) # Special case for division by 2.
#
tmp_r = rgk::make_int_codetemp_info ();
( srli { r,
i => int_width - 1,
d => tmp_r
}
@
[ add { r1 => r,
r2 => tmp_r,
d
}
],
d
);
else
# Invoke rounding callback:
#
round_to_zero void_expression { type=>int_width, r, i=>i - 1, d };
([], d);
fi;
fi;
round_div { mode, ... }
=>
error("Integer rounding mode " + tcp::rounding_mode_to_string mode + " is not supported");
end;
fun divide_non_trap { mode, void_expression }{ r, i, d }
=
if (i > 0 and is_power_of2 (itow i))
#
my (code, r)
=
round_div { mode, void_expression, r, i };
code@shiftri { r, i=>log2 (itow i), d };
# won't overflow
else
raise exception TOO_COMPLEX;
fi;
# OVERFLOW trapping version of multiply:
# We can use only add and shadd to perform the multiplication,
# because of overflow trapping problem.
#
fun multiply_trap { r, i, d }
=
{ fun mult (r, w, max_cost, d)
=
if (max_cost <= 0)
#
raise exception TOO_COMPLEX;
else
case (w, sh1addv, sh2addv, sh3addv, always_zero_register)
#
# Some simple base cases:
#
(0u2, _, _, _, _) => addv { r1=>r, r2=>r, d };
(0u3, THE f, _, _, _) => f { r1=>r, r2=>r, d };
(0u4, _, THE f, _, THE z) => f { r1=>r, r2=>z, d };
(0u5, _, THE f, _, _) => f { r1=>r, r2=>r, d };
(0u8, _, _, THE f, THE z) => f { r1=>r, r2=>z, d };
(0u9, _, _, THE f, _) => f { r1=>r, r2=>r, d };
_ =>
{
# Recurse on the bit patterns of w
#
tmp = rgk::make_int_codetemp_info ();
if ((w & 0u1) == 0u1)
#
mult (r, w - 0u1, max_cost - 1, tmp)
@
addv { r1=>tmp, r2=>r, d };
else
case (w & 0u7, sh3addv, always_zero_register)
#
(0u0, THE f, THE z) # Times 8
=>
mult (r, w >> 0u3, max_cost - 1, tmp)
@
f { r1=>tmp, r2=>z, d };
_ => case (w & 0u3, sh2addv, always_zero_register)
#
(0u0, THE f, THE z) # Times 4
=>
mult (r, w >> 0u2, max_cost - 1, tmp)
@
f { r1=>tmp, r2=>z, d };
_ =>
mult (r, w >> 0u1, max_cost - 1, tmp)
@
addv { r1=>tmp, r2=>tmp, d };
esac;
esac;
fi;
};
esac;
fi;
if (i <= 0) raise exception TOO_COMPLEX;
elif (i == 1) [mov { r, d } ];
else mult (r, itow i,*mult_cost, d);
fi;
};
fun divide_trap { mode, void_expression }{ r, i, d }
=
if (i > 0 and is_power_of2 (itow i))
my (code, r)
=
round_div { mode, void_expression, r, i };
code@shiftri { r, i=>log2 (itow i), d };
# won't overflow
else
raise exception TOO_COMPLEX;
fi;
fun multiply x = if trapping multiply_trap x; else multiply_non_trap x;fi;
fun divide x = if trapping divide_trap x; else divide_non_trap x;fi;
};
end;