src/lib/std/src/number-scan.pkg

15.4.1144 src/lib/std/src/number-scan.pkg

## number-scan.pkg
#
# The string conversion for the largest int and word types.
# All of the other scan functions can be implemented in terms of them.

# Compiled by:
#     src/lib/std/src/standard-core.sublib

stipulate
    package i1w =  one_word_int;                                        # one_word_int          is from   src/lib/std/types-only/basis-structs.pkg
    package u1w =  one_word_unt;                                        # one_word_unt          is from   src/lib/std/types-only/basis-structs.pkg
    #
    package it  =  inline_t;                                            # inline_t              is from   src/lib/core/init/built-in.pkg
    package ns  =  number_string;                                       # number_string         is from   src/lib/std/src/number-string.pkg
herein

    package number_scan

    : (weak)
    api {

        scan_word
            :
            ns::Radix                                   #
            ->
            ns::Reader( Char, X )
            ->
            ns::Reader( u1w::Unt, X );

        scan_int
            :
            ns::Radix
            ->
            ns::Reader( Char, X )
            ->
             ns::Reader( i1w::Int, X );

        scan_real
            :
            ns::Reader( Char, X)
            ->
            ns::Reader( Float, X );

            # * should be to eight_byte_float::Float *

    }
    {
        package u32 =  it::u1;                          # "u1"  ==  "one-word unsigned int" (32-bits on 32-bit architectures; 64-bit on 64-bit architectures).
        package ti  =  it::ti;                          # "ti"  ==  "tagged_int".
        package i32 =  it::i1;                          # "i1"  ==  "one-word   signed int" (32-bits on 32-bit architectures; 64-bit on 64-bit architectures).
        package r   =  it::f64;                         # "f64" ==  "64-bit float'.

        Unt = u1w::Unt;

        (<)  = u32::(<);
        (>=) = u32::(>=);
        (+)  = u32::(+);
        (-)  = u32::(-);
        (*)  = u32::(*);

        largest_word_div10 =  0u429496729: Unt;         #  2^32-1 divided by 10                 # 64-bit issue
        largest_word_mod10 =  0u5:         Unt;         #  remainder                            # 64-bit issue

        largest_neg_int1 = 0ux80000000:  Unt;                                                   # 64-bit issue
        largest_pos_int1 = 0ux7fffffff:  Unt;                                                   # 64-bit issue
        min_int1         = -2147483648:  i1w::Int;                                              # 64-bit issue

        # A table for mapping digits to values.  Whitespace characters map to
        # 128, "+" maps to 129, "-", "~" map to 130, "." maps to 131, and the
        # characters 0-9, A-Z, a-z map to their * base-36 value.  All other
        # characters map to 255.

        stipulate

            cvt_table = "\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\x80\x80\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\x80\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x81\xff\x82\x83\xff\
                \\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\
                \\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\
                \\x19\x1a\x1b\x1c\x1d\x1e\x1f\xff\x21\x22\x23\xff\xff\xff\xff\xff\
                \\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\
                \\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\xff\xff\xff\x82\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
                \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
              \";

            to_int = it::char::ord;

        herein

            fun code (c:  Char)
                =
                u32::from_int (to_int (it::vector_of_chars::get_byte_as_char (cvt_table, to_int c)));

            ws_code    = 0u128:  Unt;           #  Code for whitespace

            plus_code  = 0u129:  Unt;           #  Code for '+'
            minus_code = 0u130:  Unt;           #  Code for '-' and '~'

            pt_code    = 0u131:  Unt;           #  Code for '.'

            e_code     = 0u14:   Unt;           #  Code for 'e' and 'E'
            w_code     = 0u32:   Unt;           #  Code for 'w'
            x_code     = 0u33:   Unt;           #  Code for 'X' and 'X'
        end;

        Prefix_Pat
            =
            { w_okay:  Bool,            # TRUE if 0[wW] prefix is okay; if this is
                                            # TRUE, then signs (+, -, ~) are not okay.

              x_okay:  Bool,            #  TRUE if 0[xX] prefix is okay
              pt_okay: Bool,            #  TRUE if can start with point
              is_digit:  Unt -> Bool    #  returns TRUE for allowed digits
            };

    #     scanPrefix:  prefix_pat
    #                  ->
    #                  Reader( char, X )
    #                  ->
    #                  X
    #                  ->
    #                  Null_Or { neg: Bool, next: word /* code */, rest: X }
    #
    #       scans prefix for a number:
    #       binPattern (TRUE)  { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isBinDigit } =>
    #      (0[wW])?b (b binary digit)
    #       binPattern (FALSE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isBinDigit } =>
    #      [-~+]?b
    #       octPattern (TRUE)  { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isOctDigit } =>
    #      (0[wW])?o (o octal digit)
    #       octPattern (FALSE) { wOkay=FALSE, xOkay=FALSE, ptOkay=FALSE, isOctDigit } =>
    #      [-~+]?o
    #       hexPattern (TRUE)  { wOkay=TRUE, xOkay=TRUE, ptOkay=FALSE, is_hex_digit } =>
    #           (0[wW][xX])?h (h hex digit)
    #       hexPattern (FALSE) { wOkay=FALSE, xOkay=TRUE, ptOkay=FALSE, is_hex_digit } =>
    #      [-~+]?(0[xX])?h
    #       decPattern (TRUE, FALSE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isDecDigit } =>
    #           (0[wW][xX])?d (d decimal digit)
    #       decPattern (FALSE, false) { wOkay=FALSE, xOkay=FALSE, ptOkay=FALSE, isDecDigit } =>
    #      [-~+]?d
    #       decPattern (FALSE, TRUE) { wOkay=FALSE, xOkay=FALSE, ptOkay=TRUE, isDecDigit } =>
    #      [-~+]?[.d]
    #
    #       Sign characters, initial 0x, 0u, etc are consumed.  The initial
    #       digit or point code is returned as the value of next.

        fun scan_prefix (p:  Prefix_Pat) getc cs
            =
            get_opt_sign (skip_ws cs)
            where
                fun get_next cs
                    =
                    case (getc cs)
                        #
                        THE (c, cs) =>   THE (code c, cs);
                        NULL        =>   NULL;
                    esac;

                fun skip_ws cs
                    =
                    case (get_next cs)
                        #
                        THE (c, cs')
                            =>
                            if (c == ws_code)   skip_ws cs';
                            else                THE (c, cs');
                            fi;

                        NULL => NULL;
                    esac;

                fun get_opt_sign (next as THE (c, cs))
                        =>
                        if   (p.w_okay)          get_opt0 (FALSE, THE (c, cs));
                        elif (c == plus_code)    get_opt0 (FALSE, get_next cs);
                        elif (c == minus_code)   get_opt0 (TRUE,  get_next cs);
                        else                     get_opt0 (FALSE, next);
                        fi;

                    get_opt_sign NULL =>   NULL;
                end

                also
                fun get_opt0 (neg, THE (c, cs))
                        =>
                        if (c == 0u0
                        and (p.w_okay or p.x_okay)
                        )
                            get_opt_w (neg, (c, cs), get_next cs);
                        else
                            finish (neg, (c, cs));
                        fi;

                    get_opt0 (neg, NULL) =>   NULL;
                end

                also
                fun get_opt_w (neg, saved_cs, arg as THE (c, cs))
                        =>
                        if (c == w_code and p.w_okay)   get_opt_x (neg, saved_cs, get_next cs);
                        else                            get_opt_x (neg, saved_cs, arg);
                        fi;

                    get_opt_w (neg, saved_cs, NULL)
                        =>
                        finish (neg, saved_cs);
                end

                also
                fun get_opt_x (neg, saved_cs, NULL)
                        =>
                        finish (neg, saved_cs);

                    get_opt_x (neg, saved_cs, arg as THE (c, cs))
                        =>
                        if (c == x_code  and  p.x_okay)   check_digit (neg, saved_cs, get_next cs);
                        else                              check_digit (neg, saved_cs, arg);
                        fi;
                end

                also
                fun check_digit (neg, saved_cs, THE (c, cs))
                      =>
                      if (p.is_digit c)  THE { neg, next => c, rest => cs };
                      else               finish (neg, saved_cs);
                      fi;

                    check_digit (neg, saved_cs, NULL)
                        =>
                        finish (neg, saved_cs);
                end

                also
                fun finish (neg, (c, cs))
                    =
                    if ((p.is_digit c) or ((c == pt_code) and p.pt_okay))
                        #
                        THE { neg, next => c, rest => cs };
                    else
                        NULL;
                    fi;

            end;                                # fun scan_prefix

        # For power of 2 bases (2, 8 & 16),
        # we can check for overflow by looking
        # at the hi (1, 3 or 4) bits.
        #
        fun check_overflow mask w
            =
            if (u32::bitwise_and (mask, w) != 0u0)   raise exception OVERFLOW;   fi;

        fun is_bin_digit d = (d < 0u2);
        fun is_oct_digit d = (d < 0u8);
        fun is_dec_digit d = (d < 0u10);
        fun is_hex_digit d = (d < 0u16);

        fun bin_pattern w_okay = { w_okay, x_okay=>FALSE, pt_okay=>FALSE, is_digit=>is_bin_digit };
        fun oct_pattern w_okay = { w_okay, x_okay=>FALSE, pt_okay=>FALSE, is_digit=>is_oct_digit };
        fun hex_pattern w_okay = { w_okay, x_okay=>TRUE,  pt_okay=>FALSE, is_digit=>is_hex_digit };

        fun dec_pattern (w_okay, pt_okay)
            =
            { w_okay, x_okay=>FALSE, pt_okay,
                                     is_digit=>is_dec_digit };

        fun scan_bin is_word getc cs
            =
            case (scan_prefix (bin_pattern is_word) getc cs)
                #
                THE { neg, next, rest }
                    =>
                    convert (next, rest)
                    where
                        check_overflow
                            =
                            check_overflow 0ux80000000;

                        fun convert (w, rest)
                            =
                            case (getc rest)
                                #
                                THE (c, rest')
                                    =>
                                    {   d =  code c;
                                        #
                                        if (is_bin_digit d)
                                            #
                                            check_overflow w;
                                            convert (u32::(+) (u32::lshift (w, 0u1), d), rest');
                                        else
                                            THE { neg, word=>w, rest };
                                        fi;
                                   };

                                NULL =>   THE { neg, word=>w, rest };
                            esac;
                    end;

                NULL =>  NULL;
            esac;

        fun scan_oct is_word getc cs
            =
            case (scan_prefix (oct_pattern is_word) getc cs)
                #
                THE { neg, next, rest }
                    =>
                    convert (next, rest)
                    where
                        check_overflow
                            =
                            check_overflow 0uxE0000000;

                        fun convert (w, rest)
                            =
                            case (getc rest)
                                #
                                THE (c, rest')
                                    =>
                                    {   d =  code c;
                                        #
                                        if (is_oct_digit d)
                                            #
                                            check_overflow w;
                                            convert (u32::(+) (u32::lshift (w, 0u3), d), rest');
                                        else
                                            THE { neg, word=>w, rest };
                                        fi;
                                    };

                                NULL => THE { neg, word=>w, rest };
                            esac;
                    end;

                NULL => NULL;
            esac;

        fun scan_dec is_word getc cs
            =
            case (scan_prefix (dec_pattern (is_word, FALSE)) getc cs)
                #
                THE { neg, next, rest }
                    =>
                    convert (next, rest)
                    where
                        fun convert (w, rest)
                            =
                            case (getc rest)
                                #
                                THE (c, rest')
                                    =>
                                    {   d =  code c;
                                        #
                                        if (is_dec_digit d)
                                            #
                                            if ((w >= largest_word_div10)
                                                 and ((largest_word_div10 < w)
                                                   or (largest_word_mod10 < d))
                                            )
                                                raise exception OVERFLOW;
                                            fi;

                                            convert (0u10*w+d, rest');
                                        else
                                            THE { neg, word=>w, rest };
                                        fi;
                                    };

                                NULL =>  THE { neg, word=>w, rest };
                            esac;
                    end;

                NULL =>  NULL;
            esac;

        fun scan_hex is_word getc cs
            =
            case (scan_prefix  (hex_pattern is_word)  getc  cs)
                #
                THE { neg, next, rest }
                    =>
                    convert (next, rest)
                    where
                        check_overflow
                            =
                            check_overflow  0uxF0000000;

                        fun convert (w, rest)
                            =
                            case (getc rest)
                                #
                                THE (c, rest')
                                    =>
                                    {   d =  code c;
                                        #
                                        if (is_hex_digit d)
                                            #
                                            check_overflow w;
                                            convert (u32::(+) (u32::lshift (w, 0u4), d), rest');
                                        else
                                            THE { neg, word=>w, rest };
                                        fi;
                                   };

                                NULL =>   THE { neg, word=>w, rest };
                            esac;
                    end;

                NULL =>   NULL;
            esac;

        fun final_word scan_g getc cs
            =
            case (scan_g TRUE getc cs)
                #
                THE { neg, word, rest }
                    =>
                    THE (word, rest);

                NULL => NULL;
            esac;

        fun scan_word  ns::BINARY  =>  final_word  scan_bin;
            scan_word  ns::OCTAL   =>  final_word  scan_oct;
            scan_word  ns::DECIMAL =>  final_word  scan_dec;
            scan_word  ns::HEX     =>  final_word  scan_hex;
        end;

        stipulate
            fromword32 =  i32::from_large  o  u32::to_large_int_x;
        herein

            fun final_int scan_g getc cs
                =
                case (scan_g FALSE getc cs)
                    #
                    THE { neg=>TRUE, word, rest }
                        =>
                        if (word < largest_neg_int1)
                            #
                            THE (it::i1::neg(fromword32 word), rest);
                        else
                            if (largest_neg_int1 < word)
                                #
                                raise exception OVERFLOW;
                            else
                                THE (min_int1, rest);
                            fi;
                        fi;

                    THE { word, rest, ... }
                        =>
                        if (largest_pos_int1 < word)
                            #
                            raise exception OVERFLOW;
                        else
                            THE (fromword32 word, rest);
                        fi;

                    NULL => NULL;
                esac;
        end;

        fun scan_int  ns::BINARY  =>  final_int  scan_bin;
            scan_int  ns::OCTAL   =>  final_int  scan_oct;
            scan_int  ns::DECIMAL =>  final_int  scan_dec;
            scan_int  ns::HEX     =>  final_int  scan_hex;
        end;

        # Scan a string of decimal digits (starting with d), and return their
        # value as a real number.  Also return the number of digits, and the
        # rest of the stream.
        #
        fun fscan10 getc (d, cs)
            =
            {   fun word_to_real w
                    =
                    it::f64::from_tagged_int (u32::to_int_x w);

                fun scan (accum, n, cs)
                    =
                    case (getc cs)
                        #
                        THE (c, cs')
                            =>
                            {   d =  code c;
                                #
                                if (is_dec_digit d)
                                    #
                                    scan (r::(+) (r::(*) (10.0, accum), word_to_real d), ti::(+) (n, 1), cs');
                                else
                                    THE (accum, n, cs);
                                fi;
                            };

                        NULL =>   THE (accum, n, cs);
                    esac;

                if (is_dec_digit d)   scan (word_to_real d, 1, cs);
                else                  NULL;
                fi;
            };

        stipulate

            neg_table = #[
                    1.0E-0, 1.0E-1, 1.0E-2, 1.0E-3, 1.0E-4,
                    1.0E-5, 1.0E-6, 1.0E-7, 1.0E-8, 1.0E-9
                  ];

            pos_table = #[
                    1.0E0, 1.0E1, 1.0E2, 1.0E3, 1.0E4,
                    1.0E5, 1.0E6, 1.0E7, 1.0E8, 1.0E9
                  ];

            fun scale (table, step10:  Float)
                =
                f
                where
                    fun f (r, 0) =>   r;
                        #
                        f (r, exp)
                            =>
                            if (ti::(<) (exp, 10))
                                #
                                (r::(*) (r, it::poly_vector::get (table, exp)));
                            else
                                f (r::(*) (step10, r), ti::(-) (exp, 10));
                            fi;
                    end;
                end;

        herein

            scale_up   =  scale (pos_table, 1.0E10);
            scale_down =  scale (neg_table, 1.0E-10);

        end;

        fun scan_real getc cs
            =
            {   fun scan10 cs
                    =
                    case (getc cs)
                        #
                        THE (c, cs) =>  fscan10 getc (code c, cs);
                        NULL        =>  NULL;
                    esac;

                fun get_frac rest
                    =
                    case (scan10 rest)
                        #
                        THE (frac, n, rest)
                            =>
                            THE (scale_down (frac, n), rest);

                        NULL => NULL;
                    esac;

                fun negate (TRUE,  num) =>  r::neg num;
                    negate (FALSE, num) =>  num;
                end;

                fun scan_expression cs
                    =
                    case (getc cs)
                        #
                        THE (c, cs)
                            =>
                            {   d =  code c;
                                #
                                fun scan (accum, cs)
                                    =
                                    case (getc cs)
                                        #
                                        THE (c, cs')
                                            =>
                                            {   d =  code c;
                                                #
                                                if (is_dec_digit d)   scan (ti::(+) (ti::(*) (accum, 10), u32::to_int_x d), cs');
                                                else                  (accum, cs);
                                                fi;
                                           };

                                        NULL =>   (accum, cs);
                                    esac;

                                if (is_dec_digit  d)    THE (scan (u32::to_int_x d, cs));
                                else                    NULL;
                                fi;
                            };

                        NULL =>  NULL;
                    esac;

                fun get_expression (num, cs)
                    =
                    case (getc cs)
                        #
                        THE (c, cs1)
                            =>
                            if (code c == e_code)
                                #
                                case (getc cs1)
                                    #
                                    THE (c, cs2)
                                        =>
                                        {   code_c =  code c;
                                            #
                                            my (is_neg, cs3)
                                                =
                                                if (code_c == minus_code)
                                                    #
                                                    (TRUE, cs2);
                                                else
                                                    code_c == plus_code
                                                      ?? (FALSE, cs2)
                                                      :: (FALSE, cs1);
                                                fi;                             #  no sign

                                            case (scan_expression cs3)
                                                #
                                                THE (exp, cs4)
                                                    =>
                                                    THE ( is_neg ?? scale_down (num, exp)
                                                                 :: scale_up   (num, exp),
                                                          cs4
                                                        );

                                                NULL =>  THE (num, cs);
                                            esac;
                                        };

                                    NULL =>  THE (num, cs);
                                esac;

                            else
                                THE (num, cs);
                            fi;

                        NULL =>  THE (num, cs);
                    esac;

                case (scan_prefix (dec_pattern (FALSE, TRUE)) getc cs)
                    #
                    THE { neg, next, rest }
                        =>
                        if (next == pt_code)    #  initial point after prefix
                            #
                            case (get_frac rest)
                                #
                                THE (frac, rest)
                                    =>
                                    get_expression (negate (neg, frac), rest);

                                NULL =>  NULL;
                            esac;                       #  initial point not followed by digit

                        else
                            #  ASSERT: next must be a digit
                            #  get whole number part

                            case (fscan10 getc (next, rest))
                                #
                                THE (whole, _, rest)
                                    =>
                                    case (getc rest)
                                        #
                                        THE ('.', rest')
                                            =>
                                            # Whole part followed by point,
                                            # get fraction:
                                            #
                                            case (get_frac rest')
                                                #
                                                THE (frac, rest'')
                                                    =>
                                                    # Fraction exists:
                                                    #
                                                    get_expression (negate (neg, r::(+) (whole, frac)), rest'');

                                                NULL =>   THE (negate (neg, whole), rest);                                              # No fraction -- point terminates num.
                                            esac;

                                         _  => get_expression (negate (neg, whole), rest);
                                    esac;

                                NULL =>  NULL;                                                                                          # ASSERT: this case can't happen
                            esac;
                        fi;

                    NULL =>  NULL;
                esac;
            };                  # fun scan_real
    };
end;

Comments and suggestions to: bugs@mythryl.org