src/lib/std/src/multiword-int-guts.pkg

15.4.1124 src/lib/std/src/multiword-int-guts.pkg

## multiword-int-guts.pkg  -- indefinite-precision integer arithmetic.
## COPYRIGHT (c) 2003 by The SML/NJ Fellowship.
## Author of the current code: Matthias Blume (blume@tti-c.org)
#
# The implementation in this file, together with its counterpart
# in lib/core/init/core-multiword-int.pkg inf
#
#     src/lib/core/init/core-multiword-int.pkg
#
# is derived from an earlier implementation of integer
# in standard.lib.
#
# That implementation, in turn, was derived from
# Andrzej Filinski's bignum package.
#
# The idea is that this package conforms to the specification of
# integer as described in the SML Basis library reference.
#
# The type multiword_int::Int itself is abstract.  A concrete version (together
# with conversions between abstract and concrete) is provided
# by package core_multiword_int.  (The type is a built-in type because
# the compiler must have some intrinsic knowledge of it in order to
# be able to implement
#   - multiword_int::Int literals
#   - conversion shortcuts (one_word_int::fromLarge o int::toLarge, etc.)
#   - overloading on literals
#   - pattern matching on literals
#
# Package core_multiword_int implements all the "essential" pieces which
# are required for the pervasive dictionary and for supporting the
# compiler (literals, conversions).
#
# The present package implements the rest and provides the complete
# interface as mandated by the Basis spec.
#
# The current implementation is not as efficient
# as it could and should be.             XXX BUGGO FIXME

# Compiled by:
#     src/lib/std/src/standard-core.sublib

###            "Anyone who has never made a mistake
###             has never tried anything new."
###
###                          -- Albert Einstein

stipulate
    package lms =  list_mergesort;                                              # list_mergesort                        is from   src/lib/src/list-mergesort.pkg
herein

    package multiword_int_guts
          : Multiword_Int                                                                       # Multiword_Int         is from   src/lib/std/src/multiword-int.api
    {
        Int =  multiword_int::Int;

        precision = NULL;
        min_int   = NULL;
        max_int   = NULL;

        # The following assumes large_int = one_word_int.
        # If integer is provided, it will be
        # large_int and to_large and from_large
        # will be the identity function.

        to_int     = inline_t::in::to_int;
        from_int   = inline_t::in::from_int;

        to_multiword_int   = inline_t::in::to_large;
        from_multiword_int = inline_t::in::from_large;

        Rep == core_multiword_int::Rep;

        concrete =  core_multiword_int::concrete;                                               # core_multiword_int    is from   src/lib/core/init/core-multiword-int.pkg
        abstract =  core_multiword_int::abstract;

        base_bits =  tagged_unt_guts::to_int_x core_multiword_int::base_bits;

        fun binary (f, gen_sign) (x, y)
            =
            {   my BI { negative=>sx, digits=>xs } =  concrete x;
                my BI { negative=>sy, digits=>ys } =  concrete y;

                sign = gen_sign (sx, sy);

                # Convert to two's complement;
                # Compute (- x - borrow)
                #
                fun twos (FALSE,  x, borrow) =>  (x, 0u0);
                    twos (TRUE, 0u0,    0u0) =>  (0u0, 0u0); #  no borrow
                    twos (TRUE,   x, borrow) =>  (core_multiword_int::base - x - borrow, 0u1); #  Borrow
                end;

                # Convert to ones's complement
                #
                ones = twos;

                fun loop ([], [], _, _, _)
                        =>
                        [];

                    loop ([], y ! ys, bx, by, bz)
                        =>
                        loop1 (0u0, [], y, ys, bx, by, bz);

                    loop (x ! xs, [], bx, by, bz)
                        =>
                        loop1 (x, xs, 0u0, [], bx, by, bz);

                    loop (x ! xs, y ! ys, bx, by, bz)
                        =>
                        loop1 (x, xs, y, ys, bx, by, bz);
                end

                also
                fun loop1 (x, xs, y, ys, bx, by, bz)
                    =
                    {   # Convert from ones complement:
                        #
                        my (x, bx) =  twos (sx, x, bx);
                        my (y, by) =  twos (sy, y, by);
                        z  = f (x, y);

                        # Convert back to ones complement:
                        #
                        my (z, bz) = ones (sign, z, bz);
                        zs = loop (xs, ys, bx, by, bz);

                        case (z, zs)    #  strip leading zero
                            (0u0, []) =>  [];
                            (z, zs)   =>  z ! zs;
                        esac;
                    };

                case (loop (xs, ys, 0u0, 0u0, 0u0))
                    []     =>  abstract (BI { digits => [], negative => FALSE } );
                    digits =>  abstract (BI { negative => sign, digits } );
                esac;
            };

        fun shift_amount w
            =
            { bytes =>  tagged_unt_guts::(/) (w, core_multiword_int::base_bits),
              bits  =>  tagged_unt_guts::(%) (w, core_multiword_int::base_bits)
            };

        infix my | & << >>;

        my (<<) = tagged_unt_guts::(<<);
        my (>>) = tagged_unt_guts::(>>);
        my (&)  = tagged_unt_guts::bitwise_and;
        my (|)  = tagged_unt_guts::bitwise_or;

        # Formatting for bases 2, 8, 16 by
        # slicing off the right number of bits:
        #
        fun bitformat (bits, maxdig, digvec) i
            =
            {   fun dig d
                    =
                    string_guts::get_byte_as_char (digvec, tagged_unt_guts::to_int_x d);

                my BI { digits, negative }
                    =
                    concrete i;

                fun addsign l
                    =
                    negative  ??  '-' ! l
                              ::        l;

                fun loop (chars, [], 0u0, _)
                        =>
                        string_guts::implode (addsign chars);

                    loop (chars, xs, c, cb)
                        =>
                        if (cb >= bits)

                            loop (dig (c & maxdig) ! chars,
                                 xs, c >> bits, cb - bits);
                        else
                            my (x, xs')
                                =
                                case xs
                                    []      =>   (0u0, []);
                                    x ! xs' =>   (x, xs');
                                esac;

                            a = ((x << cb) | c) & maxdig;

                            loop (dig a ! chars, xs',
                                  x >> (bits - cb),
                                  core_multiword_int::base_bits - bits + cb);
                        fi;
                end;

                case digits
                    [] =>  "0";
                    _  =>  loop ([], digits, 0u0, 0u0);
                esac;
            };

        my (dec_base, dec_digs)
            =
            try (0u1000000000, 9)
            where
                fun try (b, d)
                    =
                    if (b <= core_multiword_int::base)

                         (b, d);
                    else
                         try (tagged_unt_guts::(/) (b, 0u10), d - 1);
                    fi;
            end;

        # Decimal formatting by repeatedly dividing
        # by the largest possible power of 10:
        #
        fun decformat i
            =
            {   to_string
                    =
                    tagged_unt_guts::format number_string::DECIMAL;

                fun dec_dig d
                    =
                    number_string::pad_left  '0'  dec_digs  (to_string d);

                fun loop (l, [])
                        =>
                        l;

                    loop (l, [x])
                        =>
                        to_string x ! l;

                    loop (l, xs)
                        =>
                        {   my (q, r)
                                =
                                core_multiword_int::natdivmodd  (xs, dec_base);

                            loop (dec_dig r ! l, q);
                        };
                end;

                case (concrete i)

                    BI { digits => [], ... }
                        =>
                       "0";

                    BI { digits, negative }
                        =>
                        cat  if negative  "-" ! loop ([], digits);
                             else               loop ([], digits);
                             fi;
                esac;
            };

        fun format number_string::OCTAL   =>  bitformat (0u3, 0ux7, "01234567");
            format number_string::HEX     =>  bitformat (0u4, 0uxf, "0123456789abcdef");
            format number_string::BINARY  =>  bitformat (0u1, 0ux1, "01");
            format number_string::DECIMAL =>  decformat;
        end;

        fun sign i
            =
            case (concrete i)
                BI { digits => [], ... } => 0;
                BI { negative,     ... } => if negative  -1;
                                            else          1;
                                            fi;
            esac;

        fun same_sign (i, j)
            =
            sign i == sign j;

        fun bitwise_not x
            =
            -(x + abstract (BI { negative => FALSE, digits => [0u1] } ));

        fun log2 i
            =
            case (concrete i)
                #
                BI { negative => TRUE, ... }
                    =>
                    raise exception DOMAIN;

                BI { digits, ... }
                    =>
                    loop (digits, 0)
                    where
                        fun wloop (0u0,  _) =>  raise exception DOMAIN; # Should never happen.
                            wloop (0u1, lg) =>  lg;
                            wloop (w,   lg) =>  wloop (tagged_unt_guts::(>>) (w, 0u1), lg + 1);
                        end;

                        fun loop ([],     lg) =>  raise exception DOMAIN;
                            loop ([x],    lg) =>  wloop (x, lg);
                            loop (x ! xs, lg) =>  loop (xs, lg + base_bits);
                        end;
                    end;
            esac;

        bitwise_or  =  binary (tagged_unt_guts::bitwise_or,   \\ (x, y) =  x or  y);
        bitwise_and =  binary (tagged_unt_guts::bitwise_and,  \\ (x, y) =  x and y);
        bitwise_xor =  binary (tagged_unt_guts::bitwise_xor,  \\ (x, y) =  x !=  y);

        # left shift; just shift the digits,
        # no special treatment for
        # signed versus unsigned:
        #
        fun lshift (i, w)
            =
            case (concrete i)

                BI { digits => [], negative }
                    =>
                    i;          # i == 0

                BI { digits, negative }
                    =>
                    {   my { bytes, bits }
                            =
                            shift_amount  w;

                        bits' =  core_multiword_int::base_bits - bits;

                        fun pad (0u0, xs) =>  xs;
                            pad (n,   xs) =>  pad (n - 0u1,  0u0 ! xs);
                        end;

                        fun shift ([],   0u0) =>  [];
                            shift ([], carry) =>  [carry];

                            shift (x ! xs, carry)
                                =>
                                digit ! shift (xs, carry')
                                where
                                    max_val =  core_multiword_int::max_digit;

                                    digit  =  ((x << bits) | carry) & max_val;

                                    carry' =  x >> bits';
                                end;
                        end;

                        abstract
                            (BI { negative,
                                  digits => if (bits == 0u0)
                                                pad (bytes, digits);
                                            else
                                                pad (bytes, shift (digits, 0u0));
                                            fi
                                }
                            );
                    };
            esac;

        # Right shift.
        #
        fun rshift (i, w)
            =
            case (concrete i)

                BI { digits => [], negative }
                    =>
                    i;          #  i == 0

                BI { digits, negative }
                    =>
                    {   my { bytes, bits }
                            =
                            shift_amount w;

                        bits' =  core_multiword_int::base_bits - bits;

                        fun drop (0u0, i      ) =>  i;
                            drop (n,   []     ) =>  [];
                            drop (n,   x ! xs) =>  drop (n - 0u1, xs);
                        end;

                        fun shift []
                                =>
                                ([], 0u0);

                            shift (x ! xs)
                                =>
                                {   my (zs, borrow)
                                        =
                                        shift xs;

                                    z = borrow | (x >> bits);

                                    borrow' = (x << bits') & core_multiword_int::max_digit;

                                    #  strip leading 0
                                    case (z, zs)

                                        (0u0, []) =>  ([], borrow');
                                        _         =>  (z ! zs,   borrow');
                                    esac;
                                };
                        end;

                        digits
                            =
                            if (bits == 0u0)

                                drop (bytes, digits);
                            else
                                #1 (shift (drop (bytes, digits)));
                            fi;

                        abstract  case digits
                                      [] => BI { negative => FALSE, digits => [] };
                                      _  => BI { negative,          digits       };
                                  esac;
                    };
            esac;

        fun startscan (do_it, hex) getchar s
            =
            sign (number_string::skip_ws getchar s)
            where
                fun hexprefix (neg, s)
                    =
                    case (getchar s)
                        THE (('x' | 'X'), s') => do_it (neg, s');
                        _ => do_it (neg, s);
                    esac;

                fun prefix (neg, s)
                    =
                    if hex   hexprefix (neg, s);
                    else     do_it (neg, s);
                    fi;

                fun sign s
                    =
                    case (getchar s)
                        NULL => NULL;
                        THE ('-', s') =>  prefix (TRUE,  s');
                        THE ('+', s') =>  prefix (FALSE, s');
                        _             =>  prefix (FALSE, s);
                    esac;
            end;

        fun bitscan (bits, dig_val, hex) getchar s
            =
            startscan
                (check_first_digit, hex)
                getchar
                s
            where
                fun dcons (0u0, []) =>  [];
                    dcons (x,   xs) =>  x ! xs;
                end;

                fun check_first_digit (neg, s)
                    =
                    {   pos0    =  core_multiword_int::base_bits - bits;
                        max_val =  core_multiword_int::max_digit;

                        fun digloop (d, pos, nat, s)
                            =
                            {   fun done ()
                                    =
                                    {   i = case (dcons (d, nat))

                                                 []  =>  BI { negative => FALSE, digits => [] };
                                                 nat =>  BI { negative => neg, digits => nat };
                                            esac;

                                        i = abstract i;

                                        THE ( if (pos == 0u0)  i;
                                              else             (rshift (i, pos));
                                              fi,

                                              s
                                            );
                                    };

                                case (getchar s )

                                    NULL => done ();

                                    THE (c, s')
                                        =>
                                        case (dig_val c)

                                            NULL => done ();

                                            THE v =>
                                                if (pos < bits)

                                                    if (pos == 0u0)

                                                        digloop (v << pos0, pos0, dcons (d, nat), s');
                                                    else
                                                        digloop ((v << (pos0 + pos)) & max_val,
                                                                pos0 + pos,
                                                                dcons (d | (v >> (bits - pos)), nat),
                                                                s');
                                                    fi;
                                                else
                                                    digloop (d | (v << (pos - bits)), pos - bits,
                                                            nat, s');
                                                fi;
                                        esac;
                                esac;
                            };

                        case (getchar s)

                            THE (c, s')
                                =>
                                case (dig_val c)
                                    THE v =>  digloop (v << pos0, pos0, [], s');
                                    NULL  =>  NULL;
                                esac;

                            NULL => NULL;
                        esac;
                    };
            end;

        fun decscan getchar s
            =
            startscan
                (check_first_digit, FALSE)
                getchar
                s

            where

                fun dig_val '0' => THE 0u0;
                    dig_val '1' => THE 0u1;
                    dig_val '2' => THE 0u2;
                    dig_val '3' => THE 0u3;
                    dig_val '4' => THE 0u4;
                    dig_val '5' => THE 0u5;
                    dig_val '6' => THE 0u6;
                    dig_val '7' => THE 0u7;
                    dig_val '8' => THE 0u8;
                    dig_val '9' => THE 0u9;
                    dig_val _ => NULL;
                end;

                fun digloop (negative, nat, fact, v, s)
                    =
                    {   fun done ()
                            =
                            {   i = case (core_multiword_int::natmadd (fact, nat, v))

                                        []     => abstract (BI { negative => FALSE,
                                                                 digits => [] } );

                                        digits => abstract (BI { negative,
                                                                 digits } );
                                    esac;

                                THE (i, s);
                            };

                        case (getchar s)

                            THE (c, s')
                                =>
                                case (dig_val c)

                                    THE v'
                                        =>
                                        if (fact == dec_base)

                                            digloop (negative,
                                                     core_multiword_int::natmadd (fact, nat, v),
                                                     0u10, v', s');
                                        else
                                            digloop (negative,
                                                     nat, fact * 0u10, v * 0u10 + v', s');
                                        fi;

                                    NULL => done ();
                                esac;

                            NULL => done ();
                        esac;
                    };

                fun check_first_digit (negative, s)
                    =
                    case (getchar s)

                        THE (c, s')
                            =>
                            case (dig_val c)
                                 THE v =>  digloop (negative, [], 0u10, v, s');
                                 NULL  =>  NULL;
                            esac;

                        NULL => NULL;
                    esac;
            end;

        fun bin_dig_val '0' =>  THE 0u0;
            bin_dig_val '1' =>  THE 0u1;
            bin_dig_val _   =>  NULL;
        end;

        fun oct_dig_val '0' =>  THE 0u0;
            oct_dig_val '1' =>  THE 0u1;
            oct_dig_val '2' =>  THE 0u2;
            oct_dig_val '3' =>  THE 0u3;
            oct_dig_val '4' =>  THE 0u4;
            oct_dig_val '5' =>  THE 0u5;
            oct_dig_val '6' =>  THE 0u6;
            oct_dig_val '7' =>  THE 0u7;
            oct_dig_val _   =>  NULL;
        end;

        fun hex_dig_val  '0'        =>  THE 0ux0;
            hex_dig_val  '1'        =>  THE 0ux1;
            hex_dig_val  '2'        =>  THE 0ux2;
            hex_dig_val  '3'        =>  THE 0ux3;
            hex_dig_val  '4'        =>  THE 0ux4;
            hex_dig_val  '5'        =>  THE 0ux5;
            hex_dig_val  '6'        =>  THE 0ux6;
            hex_dig_val  '7'        =>  THE 0ux7;
            hex_dig_val  '8'        =>  THE 0ux8;
            hex_dig_val  '9'        =>  THE 0ux9;
            hex_dig_val ('a' | 'A') =>  THE 0uxa;
            hex_dig_val ('b' | 'B') =>  THE 0uxb;
            hex_dig_val ('c' | 'C') =>  THE 0uxc;
            hex_dig_val ('d' | 'D') =>  THE 0uxd;
            hex_dig_val ('e' | 'E') =>  THE 0uxe;
            hex_dig_val ('f' | 'F') =>  THE 0uxf;
            hex_dig_val _ => NULL;
        end;

        fun scan number_string::DECIMAL =>  decscan;
            scan number_string::HEX     =>  bitscan (0u4, hex_dig_val, TRUE);
            scan number_string::OCTAL   =>  bitscan (0u3, oct_dig_val, FALSE);
            scan number_string::BINARY  =>  bitscan (0u1, bin_dig_val, FALSE);
        end;

        my (-_)   =  core_multiword_int::neg;
        my neg    =  core_multiword_int::neg;
        my (+)    =  core_multiword_int::(+);
        my (*)    =  core_multiword_int::(*);
        my (/)    =  core_multiword_int::div ;
        my (%)    =  core_multiword_int::mod ;
        my (-)    =  core_multiword_int::(-);
        my (<)    =  core_multiword_int::(<);
        my (<=)   =  core_multiword_int::(<=);
        my (>)    =  core_multiword_int::(>);
        my (>=)   =  core_multiword_int::(>=);

        div_mod  =  core_multiword_int::div_mod ;
        quot_rem =  core_multiword_int::quot_rem ;
        quot     =  core_multiword_int::quot ;
        rem      =  core_multiword_int::rem ;
        compare  =  core_multiword_int::compare ;
        abs      =  core_multiword_int::abs ;
        pow      =  core_multiword_int::pow ;

        fun max arg =  case (compare arg)   GREATER => #1 arg;    _ => #2 arg;   esac;
        fun min arg =  case (compare arg)   LESS    => #1 arg;    _ => #2 arg;   esac;

        to_string
            =
            format  number_string::DECIMAL;

        fun from_string s
            =
            number_string::scan_string (scan number_string::DECIMAL) s;

        my (<<)  =  lshift;
        my (>>>) =  rshift;

        fun 0! =>  1;
            n! =>  n * (n - 1)! ;
        end;

        fun is_prime p                  # A very simple and naive primality tester.  2009-09-02 CrT.
            =
            {   p = abs(p);                     # Try to do something reasonable with negative numbers.

                if   (p < 4)       TRUE;        # Call zero prime.
                elif (p % 2 == 0)  TRUE;        # Special-case even numbers to halve our loop time.
                else
                    # Test all odd numbers less than p/2:

                    lim = p / 2;

                    loop 3
                    where
                        fun loop i
                            =
                            if   (i == lim)     FALSE;
                            elif (p % i == 0)   TRUE;
                            else                loop (i + 2);
                            fi;
                    end;
                fi;
            };

        fun factors n
            =
            factors' (n, 2, [])
            where
                fun factors' (n, p, results)
                    =
                    if (p*p > n)

                        reverse (n ! results);

                    elif (n % p == 0)

                        factors' (n/p, p,   p ! results);
                    else
                        factors' (n,   p+1,     results);
                    fi;
            end;

        fun sum ints
            =
            sum' (ints, 0)
            where
                fun sum' (      [], result) =>  result;
                    sum' (i ! rest, result) =>  sum' (rest, i + result);
                end;
            end;

        fun product ints
            =
            product' (ints, 1)
            where
                fun product' (      [], result) =>  result;
                    product' (i ! rest, result) =>  product' (rest, i * result);
                end;
            end;

        fun list_min [] =>   raise exception DIE "Cannot do list_min on empty list";
            #
            list_min (i ! ints)
                =>
                min' (ints, i: Int)
                where
                    fun min' (      [], result) =>  result;
                        min' (i ! rest, result) =>  min'  (rest,  i < result ?? i :: result);
                    end;
                end;
        end;

        fun list_max [] =>   raise exception DIE "Cannot do list_max on empty list";
            #
            list_max (i ! ints)
                =>
                min' (ints, i: Int)
                where
                    fun min' (      [], result) =>  result;
                        min' (i ! rest, result) =>  min'  (rest,  i > result ?? i :: result);
                    end;
                end;
        end;

        fun sort ints
            =
            lms::sort_list (>) ints;

        fun sort_and_drop_duplicates ints
            =
            lms::sort_list_and_drop_duplicates  compare  ints;

        fun mean []     =>      0;                                                      # Would throwing an exception be better?  In graphics, at least, often it is better to just gloss over the occasional special case...
            mean ints   =>      sum ints   /   from_int (length ints);
        end;

        fun median []
                =>
                0;                                                                      # As above, arbitrary, possibly should throw exception.

            median ints
                =>
                {   len  = from_int (length ints);
                    ints = lms::sort_list (>) ints;
                    #
                    i1 = len / 2;
                    i2 = i1 - 1;

                    if (is_odd(len))
                        #
                        # Return middle element:
                        #
                        list::nth (ints, to_int i1);
                    else
                        # Return average of the two middle elements:
                        #
                        n1 = list::nth (ints, to_int i1);
                        n2 = list::nth (ints, to_int i2);

                        (n1 + n2) / 2;
                    fi;
                }
                where
                    fun is_odd(i) =  (bitwise_and (i, from_int 1) == from_int 1);
                end;
        end;

    };                          #  package integer
end;

Comments and suggestions to: bugs@mythryl.org