PreviousUpNext

15.4.1106  src/lib/std/src/num-scan.pkg

## num-scan.pkg

# Compiled by:
#     src/lib/std/src/standard-core.sublib

# The string conversion for the largest int and word types.
# All of the other scan functions can be implemented in terms of them.

package num_scan

: (weak)
api {

    scan_word
        :
        number_string::Radix                                    # number_string         is from   src/lib/std/src/number-string.pkg
        ->
        number_string::Reader( Char, X )
        ->
        number_string::Reader( one_word_unt::Unt, X );


    scan_int
        :
        number_string::Radix
        ->
        number_string::Reader( Char, X )
        ->
         number_string::Reader( one_word_int::Int, X );


    scan_real
        :
        number_string::Reader( Char, X)
        ->
        number_string::Reader( Float, X );

        # * should be to eight_byte_float::Float *

}
{                                                               # inline_t              is from   src/lib/core/init/built-in.pkg
    package u32 =  inline_t::u1;                                # "u1"  ==  "one-word unsigned int" (32-bits on 32-bit architectures; 64-bit on 64-bit architectures).
    package ti  =  inline_t::ti;                                # "ti"  ==  "tagged_int".
    package i32 =  inline_t::i1;                                # "i1"  ==  "one-word   signed int" (32-bits on 32-bit architectures; 64-bit on 64-bit architectures).
    package r   =  inline_t::f64;                               # "f64" ==  "64-bit float'.

    Unt = one_word_unt::Unt;

    my (<)  = u32::(<);
    my (>=) = u32::(>=);
    my (+)  = u32::(+);
    my (-)  = u32::(-);
    my (*)  = u32::(*);

    my largest_word_div10:  Unt = 0u429496729;  #  2^32-1 divided by 10 
    my largest_word_mod10:  Unt = 0u5;          #  remainder 

    my largest_neg_int1:  Unt = 0ux80000000;
    my largest_pos_int1:  Unt = 0ux7fffffff;
    my min_int1:  one_word_int::Int = -2147483648;

    # A table for mapping digits to values.  Whitespace characters map to
    # 128, "+" maps to 129, "-", "~" map to 130, "." maps to 131, and the
    # characters 0-9, A-Z, a-z map to their * base-36 value.  All other
    # characters map to 255.

    stipulate

        cvt_table = "\
            \\255\255\255\255\255\255\255\255\255\128\128\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\128\255\255\255\255\255\255\255\255\255\255\129\255\130\131\255\
            \\000\001\002\003\004\005\006\007\008\009\255\255\255\255\255\255\
            \\255\010\011\012\013\014\015\016\017\018\019\020\021\022\023\024\
            \\025\026\027\028\029\030\031\255\033\034\035\255\255\255\255\255\
            \\255\010\011\012\013\014\015\016\017\018\019\020\021\022\023\024\
            \\025\026\027\028\029\030\031\032\033\034\035\255\255\255\130\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
            \\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
          \";

        to_int = inline_t::char::ord;

    herein

        fun code (c:  Char)
            =
            u32::from_int (to_int (inline_t::vector_of_chars::get (cvt_table, to_int c)));

        my ws_code:     Unt = 0u128;            #  Code for whitespace 
        my plus_code:   Unt = 0u129;            #  Code for '+' 
        my minus_code:  Unt = 0u130;            #  Code for '-' and '~' 
        my pt_code:     Unt = 0u131;            #  Code for '.' 
        my e_code:      Unt = 0u14;             #  Code for 'e' and 'E' 
        my w_code:      Unt = 0u32;             #  Code for 'w' 
        my x_code:      Unt = 0u33;             #  Code for 'X' and 'X' 

    end;

    Prefix_Pat
        =
        { w_okay:  Bool,                # TRUE if 0[wW] prefix is okay; if this is
                                        # TRUE, then signs (+, -, ~) are not okay.

          x_okay:  Bool,                #  TRUE if 0[xX] prefix is okay 
          pt_okay: Bool,                #  TRUE if can start with point 
          is_digit:  Unt -> Bool        #  returns TRUE for allowed digits 
        };

#     scanPrefix:  prefix_pat
#                  ->
#                  Reader( char, X )
#                  ->
#                  X
#                  ->
#                  Null_Or { neg: Bool, next: word /* code */, rest: X }
#
#       scans prefix for a number:
#       binPattern (TRUE)  { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isBinDigit } =>
#          (0[wW])?b (b binary digit)
#       binPattern (FALSE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isBinDigit } =>
#          [-~+]?b
#       octPattern (TRUE)  { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isOctDigit } =>
#          (0[wW])?o (o octal digit)
#       octPattern (FALSE) { wOkay=FALSE, xOkay=FALSE, ptOkay=FALSE, isOctDigit } =>
#          [-~+]?o
#       hexPattern (TRUE)  { wOkay=TRUE, xOkay=TRUE, ptOkay=FALSE, is_hex_digit } =>
#           (0[wW][xX])?h (h hex digit)
#       hexPattern (FALSE) { wOkay=FALSE, xOkay=TRUE, ptOkay=FALSE, is_hex_digit } =>
#          [-~+]?(0[xX])?h
#       decPattern (TRUE, FALSE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isDecDigit } =>
#           (0[wW][xX])?d (d decimal digit)
#       decPattern (FALSE, false) { wOkay=FALSE, xOkay=FALSE, ptOkay=FALSE, isDecDigit } =>
#          [-~+]?d
#       decPattern (FALSE, TRUE) { wOkay=FALSE, xOkay=FALSE, ptOkay=TRUE, isDecDigit } =>
#          [-~+]?[.d]
#
#       Sign characters, initial 0x, 0u, etc are consumed.  The initial
#       digit or point code is returned as the value of next.

    fun scan_prefix (p:  Prefix_Pat) getc cs
        =
        get_opt_sign (skip_ws cs)
        where
            fun get_next cs
                =
                case (getc cs)

                    THE (c, cs)
                        =>
                        THE (code c, cs);

                    NULL => NULL;
                esac;

            fun skip_ws cs
                =
                case (get_next cs)

                    THE (c, cs')
                        =>
                        if (c == ws_code)   skip_ws cs';
                        else                THE (c, cs');
                        fi;

                    NULL => NULL;
                esac;

            fun get_opt_sign (next as THE (c, cs))
                    =>
                    if   (p.w_okay)          get_opt0 (FALSE, THE (c, cs));
                    elif (c == plus_code)    get_opt0 (FALSE, get_next cs);
                    elif (c == minus_code)   get_opt0 (TRUE,  get_next cs);
                    else                     get_opt0 (FALSE, next);
                    fi;

                get_opt_sign NULL
                    =>
                    NULL;
            end 

            also
            fun get_opt0 (neg, THE (c, cs))
                    =>
                    if (c == 0u0
                    and (p.w_okay or p.x_okay)
                    )
                        get_opt_w (neg, (c, cs), get_next cs);
                    else
                        finish (neg, (c, cs));
                    fi;

                get_opt0 (neg, NULL)
                    =>
                    NULL;
            end 

            also
            fun get_opt_w (neg, saved_cs, arg as THE (c, cs))
                    =>
                    if (c == w_code and p.w_okay)
                         get_opt_x (neg, saved_cs, get_next cs);
                    else get_opt_x (neg, saved_cs, arg);
                    fi;

                get_opt_w (neg, saved_cs, NULL)
                    =>
                    finish (neg, saved_cs);
            end 

            also
            fun get_opt_x (neg, saved_cs, NULL)
                    =>
                    finish (neg, saved_cs);

                get_opt_x (neg, saved_cs, arg as THE (c, cs))
                    =>
                    if (c == x_code  and  p.x_okay)
                         check_digit (neg, saved_cs, get_next cs);
                    else check_digit (neg, saved_cs, arg);
                    fi;
            end 

            also
            fun check_digit (neg, saved_cs, THE (c, cs))
                  =>
                  if (p.is_digit c)  THE { neg, next => c, rest => cs };
                  else               finish (neg, saved_cs);
                  fi;

                check_digit (neg, saved_cs, NULL)
                    =>
                    finish (neg, saved_cs);
            end 

            also
            fun finish (neg, (c, cs))
                =
                if ((p.is_digit c) or ((c == pt_code) and p.pt_okay))
                    THE { neg, next => c, rest => cs };
                else
                    NULL;
                fi;

        end;                            # fun scan_prefix

    # For power of 2 bases (2, 8 & 16),
    # we can check for overflow by looking
    # at the hi (1, 3 or 4) bits.
    #
    fun check_overflow mask w
        =
        if (u32::bitwise_and (mask, w) != 0u0)   raise exception OVERFLOW;   fi;

    fun is_bin_digit d = (d < 0u2);
    fun is_oct_digit d = (d < 0u8);
    fun is_dec_digit d = (d < 0u10);
    fun is_hex_digit d = (d < 0u16);

    fun bin_pattern w_okay = { w_okay, x_okay=>FALSE, pt_okay=>FALSE, is_digit=>is_bin_digit };
    fun oct_pattern w_okay = { w_okay, x_okay=>FALSE, pt_okay=>FALSE, is_digit=>is_oct_digit };
    fun hex_pattern w_okay = { w_okay, x_okay=>TRUE,  pt_okay=>FALSE, is_digit=>is_hex_digit };

    fun dec_pattern (w_okay, pt_okay)
        =
        { w_okay, x_okay=>FALSE, pt_okay,
                                 is_digit=>is_dec_digit };

    fun scan_bin is_word getc cs
        =
        case (scan_prefix (bin_pattern is_word) getc cs)

            THE { neg, next, rest }
                =>
                convert (next, rest)
                where
                    check_overflow
                        =
                        check_overflow 0ux80000000;

                    fun convert (w, rest)
                        =
                        case (getc rest)

                            THE (c, rest')
                                =>
                                {   d = code c;

                                    if (is_bin_digit d)

                                        check_overflow w;
                                        convert (u32::(+) (u32::lshift (w, 0u1), d), rest');
                                    else
                                        THE { neg, word=>w, rest };
                                    fi;
                               };

                            NULL => THE { neg, word=>w, rest };
                        esac;
                end;

            NULL => NULL;
        esac;


    fun scan_oct is_word getc cs
        =
        case (scan_prefix (oct_pattern is_word) getc cs)

            THE { neg, next, rest }
                =>
                convert (next, rest)
                where
                    check_overflow
                        =
                        check_overflow 0uxE0000000;

                    fun convert (w, rest)
                        =
                        case (getc rest)

                            THE (c, rest')
                                =>
                                {   d = code c;

                                    if (is_oct_digit d)

                                        check_overflow w;
                                        convert (u32::(+) (u32::lshift (w, 0u3), d), rest');
                                    else
                                        THE { neg, word=>w, rest };
                                    fi;
                                };

                            NULL => THE { neg, word=>w, rest };
                        esac;
                end;
          
            NULL => NULL;
        esac;


    fun scan_dec is_word getc cs
        =
        case (scan_prefix (dec_pattern (is_word, FALSE)) getc cs)

            THE { neg, next, rest }
                =>
                convert (next, rest)
                where
                    fun convert (w, rest)
                        =
                        case (getc rest)

                            THE (c, rest')
                                =>
                                {   d = code c;

                                    if (is_dec_digit d)

                                        if ((w >= largest_word_div10)
                                             and ((largest_word_div10 < w)
                                               or (largest_word_mod10 < d))
                                        )
                                            raise exception OVERFLOW;
                                        fi;

                                        convert (0u10*w+d, rest');
                                    else
                                        THE { neg, word=>w, rest };
                                    fi;
                                };

                            NULL => THE { neg, word=>w, rest };
                        esac;
                end;

            NULL => NULL;
        esac;


    fun scan_hex is_word getc cs
        =
        case (scan_prefix  (hex_pattern is_word)  getc  cs)

            THE { neg, next, rest }
                =>
                convert (next, rest)
                where
                    check_overflow
                        =
                        check_overflow  0uxF0000000;

                    fun convert (w, rest)
                        =
                        case (getc rest)

                            THE (c, rest')
                                =>
                                {   d = code c;

                                    if (is_hex_digit d)

                                        check_overflow w;
                                        convert (u32::(+) (u32::lshift (w, 0u4), d), rest');
                                    else
                                        THE { neg, word=>w, rest };
                                    fi;
                               };

                            NULL => THE { neg, word=>w, rest };
                        esac;
                end;

            NULL => NULL;
        esac;


    fun final_word scan_g getc cs
        =
        case (scan_g TRUE getc cs)

            THE { neg, word, rest }
                =>
                THE (word, rest);

            NULL => NULL;
        esac;

    fun scan_word number_string::BINARY  =>  final_word scan_bin;
        scan_word number_string::OCTAL   =>  final_word scan_oct;
        scan_word number_string::DECIMAL =>  final_word scan_dec;
        scan_word number_string::HEX     =>  final_word scan_hex;
    end;

    stipulate

        fromword32 = i32::from_large o u32::to_large_int_x; 

    herein

        fun final_int scan_g getc cs
            =
            case (scan_g FALSE getc cs)
                #
                THE { neg=>TRUE, word, rest }
                    =>
                    if (word < largest_neg_int1)
                        #
                        THE (inline_t::i1::neg(fromword32 word), rest);
                    else 
                        if (largest_neg_int1 < word)
                            #
                            raise exception OVERFLOW;
                        else 
                            THE (min_int1, rest);
                        fi;
                    fi;

                THE { word, rest, ... }
                    =>
                    if (largest_pos_int1 < word)
                        #
                        raise exception OVERFLOW;
                    else 
                        THE (fromword32 word, rest);
                    fi;

                NULL => NULL;
            esac;
    end;

    fun scan_int number_string::BINARY  =>  final_int  scan_bin;
        scan_int number_string::OCTAL   =>  final_int  scan_oct;
        scan_int number_string::DECIMAL =>  final_int  scan_dec;
        scan_int number_string::HEX     =>  final_int  scan_hex;
    end;
  
    # Scan a string of decimal digits (starting with d), and return their
    # value as a real number.  Also return the number of digits, and the
    # rest of the stream.
    #
    fun fscan10 getc (d, cs)
        =
        {   fun word_to_real w
                =
                inline_t::f64::from_tagged_int (u32::to_int_x w);

            fun scan (accum, n, cs)
                =
                case (getc cs)
                  
                    THE (c, cs')
                        =>
                        {   d = code c;

                            if (is_dec_digit d)

                                scan (r::(+) (r::(*) (10.0, accum), word_to_real d), ti::(+) (n, 1), cs');
                            else
                                THE (accum, n, cs);
                            fi;
                        };

                    NULL
                        =>
                        THE (accum, n, cs);
                esac;


            if (is_dec_digit d)
                
                scan (word_to_real d, 1, cs);
            else
                NULL;
            fi;
        };

    stipulate

        neg_table = #[
                1.0E-0, 1.0E-1, 1.0E-2, 1.0E-3, 1.0E-4,
                1.0E-5, 1.0E-6, 1.0E-7, 1.0E-8, 1.0E-9
              ];

        pos_table = #[
                1.0E0, 1.0E1, 1.0E2, 1.0E3, 1.0E4,
                1.0E5, 1.0E6, 1.0E7, 1.0E8, 1.0E9
              ];

        fun scale (table, step10:  Float)
            =
            f
            where
                fun f (r, 0)
                        =>
                        r;

                    f (r, exp)
                        =>
                        if (ti::(<) (exp, 10))
                            #
                            (r::(*) (r, inline_t::poly_vector::get (table, exp)));
                        else
                            f (r::(*) (step10, r), ti::(-) (exp, 10));
                        fi;
                end;
            end;

    herein

        scale_up   =  scale (pos_table, 1.0E10);
        scale_down =  scale (neg_table, 1.0E-10);

    end;


    fun scan_real getc cs
        =
        {   fun scan10 cs
                =
                case (getc cs)
                    THE (c, cs) =>  fscan10 getc (code c, cs);
                    NULL        =>  NULL;
                esac;

            fun get_frac rest
                =
                case (scan10 rest)
                  
                     THE (frac, n, rest)
                         =>
                         THE (scale_down (frac, n), rest);

                     NULL => NULL;
                esac;


            fun negate (TRUE,  num) =>  r::neg num;
                negate (FALSE, num) =>  num;
            end;

            fun scan_expression cs
                =
                case (getc cs)
                  
                    THE (c, cs)
                        =>
                        {   d = code c;

                            fun scan (accum, cs)
                                =
                                case (getc cs)

                                    THE (c, cs')
                                        =>
                                        {   d = code c;

                                            if (is_dec_digit d)
                                                #
                                                scan (ti::(+) (ti::(*) (accum, 10), u32::to_int_x d), cs');
                                            else
                                                (accum, cs);
                                            fi;
                                       };

                                    NULL =>
                                        (accum, cs);
                                esac;


                            if (is_dec_digit  d)

                                THE (scan (u32::to_int_x d, cs));
                            else
                                NULL;
                            fi;
                        };

                    NULL => NULL;
                esac;


            fun get_expression (num, cs)
                =
                case (getc cs)
                  
                    THE (c, cs1)
                        =>
                        if (code c == e_code)

                            case (getc cs1)

                                THE (c, cs2)
                                    =>
                                    {   code_c = code c;

                                        my (is_neg, cs3)
                                            =
                                            if (code_c == minus_code)

                                                (TRUE, cs2);
                                            else
                                                code_c == plus_code
                                                  ?? (FALSE, cs2)
                                                  :: (FALSE, cs1);
                                            fi;                         #  no sign 

                                        case (scan_expression cs3)

                                            THE (exp, cs4)
                                                =>
                                                THE ( is_neg ?? scale_down (num, exp)
                                                             :: scale_up   (num, exp),
                                                      cs4
                                                    );

                                            NULL => THE (num, cs);
                                        esac;
                                    };

                                NULL => THE (num, cs);
                            esac;

                        else
                            THE (num, cs);
                        fi;

                    NULL => THE (num, cs);
                esac;


            case (scan_prefix (dec_pattern (FALSE, TRUE)) getc cs)

                THE { neg, next, rest }
                    =>
                    if (next == pt_code)        #  initial point after prefix 

                        case (get_frac rest)

                             THE (frac, rest)
                                 => 
                                 get_expression (negate (neg, frac), rest);

                             NULL => NULL;
                        esac;                   #  initial point not followed by digit 

                    else
                        #  ASSERT: next must be a digit 
                        #  get whole number part 

                        case (fscan10 getc (next, rest))

                            THE (whole, _, rest)
                                =>
                                case (getc rest)

                                    THE ('.', rest')
                                        =>
                                        # Whole part followed by point,
                                        # get fraction:
                                        # 
                                        case (get_frac rest')

                                            THE (frac, rest'')
                                                =>
                                                # Fraction exists:
                                                # 
                                                get_expression (negate (neg, r::(+) (whole, frac)), rest'');

                                            NULL =>
                                                # No fraction -- point terminates num:
                                                #
                                                THE (negate (neg, whole), rest);
                                        esac;

                                     _  => get_expression (negate (neg, whole), rest);
                                esac;


                            NULL => NULL; #  ASSERT: this case can't happen 
                        esac;
                    fi;

                NULL => NULL;
            esac;
        };                      # fun scan_real
};




Comments and suggestions to: bugs@mythryl.org

PreviousUpNext