PreviousUpNext

15.4.449  src/lib/compiler/back/low/treecode/treecode-mult-g.pkg

#
# Generate multiplication/division by a constant.
# This module is mainly used for architectures without fast integer multiply.
#
# -- Allen Leung

# Compiled by:
#     src/lib/compiler/back/low/lib/lowhalf.lib



###                    "Power corrupts. Absolute power is kind of neat."
###
###                          -- John Lehman, US Secretary of the Navy, 1981-1987



stipulate
    package lem =  lowhalf_error_message;                                       # lowhalf_error_message         is from   src/lib/compiler/back/low/control/lowhalf-error-message.pkg
    package rkj =  registerkinds_junk;                                          # registerkinds_junk            is from   src/lib/compiler/back/low/code/registerkinds-junk.pkg
    package tcp =  treecode_pith;                                               # treecode_pith                 is from   src/lib/compiler/back/low/treecode/treecode-pith.pkg
    package u   =  unt;                                                         # unt                           is from   src/lib/std/unt.pkg
herein
    # This generic is invoked (only) from:
    #
    #     src/lib/compiler/back/low/pwrpc32/treecode/translate-treecode-to-machcode-pwrpc32-g.pkg
    #     src/lib/compiler/back/low/sparc32/treecode/translate-treecode-to-machcode-sparc32-g.pkg
    #
    generic package   treecode_mult_g   (
        #             ===============
        #
        package mcf:    Machcode_Form;                                                  # Machcode_Form                 is from   src/lib/compiler/back/low/code/machcode-form.api

        package tcf:    Treecode_Form;                                                  # Treecode_Form                 is from   src/lib/compiler/back/low/treecode/treecode-form.api


        int_type:  Int; #  width of integer type 

        Argi = { r: rkj::Codetemp_Info, i: Int, d: rkj::Codetemp_Info };
        Arg  = { r1: rkj::Codetemp_Info, r2: rkj::Codetemp_Info, d: rkj::Codetemp_Info }; 

        # These never trap overflow:
        #
        mov:    { r: rkj::Codetemp_Info, d: rkj::Codetemp_Info } -> mcf::Machine_Op;
        add:    Arg -> mcf::Machine_Op;
        slli:   Argi -> List( mcf::Machine_Op );
        srli:   Argi -> List( mcf::Machine_Op );
        srai:   Argi -> List( mcf::Machine_Op );
    )
    (    trapping:  Bool; #  trap on overflow? 

         mult_cost:  Ref( Int );                                                # Cost of multiplication 

            #  Basic ops; these have to implemented by the architecture 

            #  if trapping == TRUE, then the following MUST trap on overflow 
         addv:    Arg  -> List( mcf::Machine_Op );
         subv:    Arg  -> List( mcf::Machine_Op );

           /* some architectures, like the PA-RISC
            * have these types of special ops 
            * if trapping == TRUE, then the following MUST also trap on overflow
            */
         sh1addv:   Null_Or( Arg -> List( mcf::Machine_Op ) );  #  A*2 + b 
         sh2addv:   Null_Or( Arg -> List( mcf::Machine_Op ) );  #  a*4 + b
         sh3addv:   Null_Or( Arg -> List( mcf::Machine_Op ) );  #  A*8 + b 
    )  
    (    signed:    Bool; #  signed? 
    )
    : (weak) Treecode_Mult_Div                                  # Treecode_Mult_Div     is from   src/lib/compiler/back/low/treecode/treecode-mult.api
    {
        # Export to client packages:
        #
        package tcf =  tcf;                                     # "tcf" == "treecode_form".
        package mcf =  mcf;                                     # "mcf" == "machcode_form" (abstract machine code).
        package rgk =  mcf::rgk;                                # "rgk" == "registerkinds".


        Arg       = Argi;

        infix my  << >> >>> | & ;
        #
        itow   = u::from_int;
        wtoi   = u::to_int_x;

        (<<)   = u::(<<);
        (>>)   = u::(>>);
        (>>>)  = u::(>>>);
        (|)    = u::bitwise_or;
        (&)    = u::bitwise_and;

        exception TOO_COMPLEX;

        fun error msg =   lem::error("treecode_mult", msg);

        always_zero_register                                    # NULL if no such register exists on the architecture. x86 has no such register; many RISC machines do.
            =
            rgk::get_always_zero_register  rkj::INT_REGISTER; 

        shiftri =  if signed  srai; else srli;fi;

        fun is_power_of2 w
            =
            ((w - 0u1) & w) == 0u0;

        fun log2 n                 #  n must be > 0!!! 
            =
            loop (n, 0)
            where
                fun loop (0u1, pow) => pow; 
                    loop (w, pow) => loop (w >> 0u1, pow+1);
                end;
            end;

        fun zero_bits (w, low_zero_bits)
            = 
            if ((w & 0u1) == 0u1)
                 (w, low_zero_bits);
            else
                 zero_bits (w >> 0u1, low_zero_bits+0u1);
            fi;

        # Non overflow trapping version of multiply: 
        # We can use add, shadd, shift, sub to perform the multiplication
        #
        fun multiply_non_trap { r, i, d }
            =
            {   fun mult (r, w, max_cost, d)
                    = 
                    if (max_cost <= 0)
                         raise exception TOO_COMPLEX;
                    elif (is_power_of2 w ) slli { r, i=>log2 w, d };
                    else
                         case (w, sh1addv, sh2addv, sh3addv)
                              #  some base cases 
                              (0u3, THE f, _, _) => f { r1=>r, r2=>r, d };
                              (0u5, _, THE f, _) => f { r1=>r, r2=>r, d };
                              (0u9, _, _, THE f) => f { r1=>r, r2=>r, d };

                              _ =>
                                  #  recurse on the bit patterns of w 
                                  {   tmp = rgk::make_int_codetemp_info ();

                                      if ((w & 0u1) == 0u1)
                                                                                     # Low order bit is 1 
                                           if ((w & 0u2) == 0u2)
                                                                                     # Second bit is 1 
                                                mult (r, w+0u1, max_cost - 1, tmp) @
                                                subv { r1=>tmp, r2=>r, d };
                                           else                                 # Second bit is 0 
                                                mult (r, w - 0u1, max_cost - 1, tmp) @
                                                addv { r1=>tmp, r2=>r, d };
                                           fi;
                                      else                                              # Low order bit is 0 
                                           my (w, low_zero_bits)
                                              =
                                              zero_bits (w, 0u0);

                                           mult (r, w, max_cost - 1, tmp) @
                                               slli { r=>tmp, i=>wtoi low_zero_bits, d };

                                      fi;
                                  };
                         esac;
                   fi;

                if   (i <= 0 ) raise exception TOO_COMPLEX;
                elif (i == 1 ) [mov { r, d } ];
                else           mult (r, itow i,*mult_cost, d);
                fi;
            };

        # The semantics of roundToZero { r, i, d } is:
        #   if r >= 0 then d <- r
        #   else d <- r + i
        #
        fun round_to_zero void_expression { type, r, i, d }
            =
            {   reg = tcf::CODETEMP_INFO (type, r);

                void_expression
                    (tcf::LOAD_INT_REGISTER
                      ( type,
                        d,
                        tcf::CONDITIONAL_LOAD
                          ( type,
                            tcf::CMP (type, tcf::GE, reg, tcf::LITERAL 0),
                            reg,
                            tcf::ADD (type, reg, tcf::LITERAL (tcf::mi::from_int (int_type, i)))
                    ) )   );
            };



        # Simulate rounding towards zero for signed division 
        #
        fun round_div { mode=>tcf::ROUND_TO_NEGINF, r, ... }
                =>
                ([], r);                                        # No rounding necessary.

           round_div { mode=>tcf::ROUND_TO_ZERO, void_expression, r, i }
                =>
                if (not signed)
                    #
                    ([], r);                                    # No rounding for unsigned division.
                else
                    d =  rgk::make_int_codetemp_info ();

                    if (i == 2)                                 # Special case for division by 2.
                        #
                        tmp_r =   rgk::make_int_codetemp_info ();

                        (   srli {   r,
                                     i => int_type - 1,
                                     d => tmp_r
                                 }
                            @
                            [   add {   r1 => r,
                                        r2 => tmp_r,
                                        d
                                    }
                            ],

                            d
                       );

                    else
                        # Invoke rounding callback:
                        #
                        round_to_zero void_expression { type=>int_type, r, i=>i - 1, d };
                        ([], d);
                    fi;
                fi;

           round_div { mode, ... }
               => 
               error("Integer rounding mode " + tcp::rounding_mode_to_string mode + " is not supported");
        end;

        fun divide_non_trap { mode, void_expression }{ r, i, d }
            = 
            if (i > 0 and is_power_of2 (itow i))
                #
                my (code, r)
                    =
                    round_div { mode, void_expression, r, i };

                code@shiftri { r, i=>log2 (itow i), d };

                               #  won't overflow 
            else
                raise exception TOO_COMPLEX;
            fi;

        # OVERFLOW trapping version of multiply: 
        #   We can use only add and shadd to perform the multiplication,
        #   because of overflow trapping problem.
        #
        fun multiply_trap { r, i, d }
            =
            {   fun mult (r, w, max_cost, d)
                    =
                    if (max_cost <= 0)
                         #      
                         raise exception TOO_COMPLEX;
                    else 
                        case (w, sh1addv, sh2addv, sh3addv, always_zero_register)
                            #
                            # Some simple base cases:
                            #
                            (0u2, _, _, _, _)           => addv { r1=>r, r2=>r, d };
                            (0u3, THE f, _, _, _)      => f { r1=>r, r2=>r, d };
                            (0u4, _, THE f, _, THE z) => f { r1=>r, r2=>z, d };
                            (0u5, _, THE f, _, _)      => f { r1=>r, r2=>r, d };
                            (0u8, _, _, THE f, THE z) => f { r1=>r, r2=>z, d };
                            (0u9, _, _, THE f, _)      => f { r1=>r, r2=>r, d };

                            _ =>
                                {
                                    # Recurse on the bit patterns of w 
                                    #   
                                    tmp =   rgk::make_int_codetemp_info ();

                                    if ((w & 0u1) == 0u1)
                                        #
                                        mult (r, w - 0u1, max_cost - 1, tmp)
                                        @
                                        addv { r1=>tmp, r2=>r, d };

                                    else 

                                        case (w & 0u7, sh3addv, always_zero_register)
                                            #
                                            (0u0, THE f, THE z)                         # Times 8 
                                                =>
                                                mult (r, w >> 0u3, max_cost - 1, tmp)
                                                @
                                                f { r1=>tmp, r2=>z, d };


                                            _ =>    case (w & 0u3, sh2addv, always_zero_register)
                                                        #
                                                        (0u0, THE f, THE z)                     # Times 4 
                                                            =>
                                                            mult (r, w >> 0u2, max_cost - 1, tmp)
                                                            @
                                                            f { r1=>tmp, r2=>z, d };

                                                        _ =>
                                                            mult (r, w >> 0u1, max_cost - 1, tmp)
                                                            @
                                                            addv { r1=>tmp, r2=>tmp, d };
                                                    esac;
                                        esac;
                                    fi;
                                };
                        esac;
                    fi; 

                if   (i <= 0)  raise exception TOO_COMPLEX;
                elif (i == 1)  [mov { r, d } ];
                else           mult (r, itow i,*mult_cost, d);
                fi;
            };

        fun divide_trap { mode, void_expression }{ r, i, d }
            =
            if   (i > 0 and is_power_of2 (itow i))

                 my (code, r)
                     =
                     round_div { mode, void_expression, r, i };

                 code@shiftri { r, i=>log2 (itow i), d };
                                                                    #  won't overflow 
            else
                 raise exception TOO_COMPLEX;
            fi;

        fun multiply x =   if trapping    multiply_trap x;   else multiply_non_trap x;fi;
        fun divide   x =   if trapping    divide_trap   x;   else divide_non_trap   x;fi;

    };
end;


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext