PreviousUpNext

15.4.1079  src/lib/std/src/char.pkg

## char.pkg

# Compiled by:
#     src/lib/std/src/standard-core.sublib

###                          "Almost everything that distinguishes the modern world
###                           from earlier centuries is attributable to science,
###                           which achieved its most spectacular triumphs
###                           in the seventeenth century."
###
###                                                          -- Bertrand Russell



stipulate
    package ic  =  inline_t::char;                              # inline_t              is from   src/lib/core/init/built-in.pkg
    package it  =  inline_t;                                    # inline_t              is from   src/lib/core/init/built-in.pkg
    package nf  =  number_format;                               # number_format         is from   src/lib/std/src/number-format.pkg
    package ns  =  number_string;                               # number_string         is from   src/lib/std/src/number-string.pkg
    package ps  =  protostring;                                 # protostring           is from   src/lib/std/src/protostring.pkg
    package rt  =  runtime;                                     # runtime               is from   src/lib/core/init/built-in.pkg.
    package g2d =  exceptions_guts;                             # exceptions_guts       is from   src/lib/std/src/exceptions-guts.pkg
herein

    package char: (weak)
                            api {
                                include api Char;               # Char                  is from   src/lib/std/src/char.api

                                scan_c:   ns::Reader( Char, X ) -> ns::Reader( Char, X );
                                    #
                                    # Internal scanning function for C-style escape sequences 
                            }
    {
        (+) =  it::default_int::(+);
        (-) =  it::default_int::(-);
        (*) =  it::default_int::(*);

        itoc =   it::cast:  Int -> Char;
        ctoi =   it::cast:  Char -> Int;

        Char = Char;
        String = String;

        min_char =   ic::chr 0           :   Char;
        max_char =   ic::chr ic::max_ord :   Char;

        max_ord         = ic::max_ord;


        fun prior (c:  Char) : Char
            =
            {   c' =   ctoi c  -  1;
                #
                if (it::default_int::(<) (c', 0))
                    #   
                    raise exception g2d::BAD_CHAR;      # exceptions_guts       is from   src/lib/std/src/exceptions-guts.pkg
                else
                    (itoc c');
                fi;
            };


        fun next (c:  Char) : Char
            =
            {   c' =   ctoi c  +  1;
                #
                if (it::default_int::(<) (max_ord, c'))
                    #
                    raise exception g2d::BAD_CHAR;
                else
                    (itoc c');
                fi;
            };


        from_int =  ic::chr;
        to_int   =  ic::ord;

        (<)  =  ic::(<);
        (<=) =  ic::(<=);
        (>)  =  ic::(>);
        (>=) =  ic::(>=);

        fun compare (c1:  Char, c2:  Char)
            =
            if   (c1 == c2)  EQUAL;
            elif (c1 <  c2)  LESS;
            else             GREATER;
            fi;

        # Testing character membership:
        #
        stipulate
            #
            fun make_array (s, s_len)
                =
                {   init 0;
                    ins  0;
                    cv;
                }
                where
                    cv =   rt::asm::make_string (max_ord+1);                            # "rt" == "runtime" -- from   src/lib/core/init/built-in.pkg
                    #
                    fun init i
                        =
                        if (it::default_int::(<=) (i, max_ord))
                            #                   
                            it::vector_of_chars::set_char_as_byte (cv, i, '\x00');
                            init (i+1);
                        fi;

                    fun ins i
                        =
                        if (it::default_int::(<) (i, s_len))
                            #                   
                            it::vector_of_chars::set_char_as_byte (
                                cv,
                                to_int (it::vector_of_chars::get_byte_as_char (s, i)),
                                '\x01'
                            );

                            ins (i+1);
                        fi;

                end;
        herein

            fun contains "" =>     \\ c =  FALSE;
                #
                contains s
                    =>
                    {   s_len =  it::vector_of_chars::length  s;
                        #
                        if (s_len == 1)
                            #
                            c' = it::vector_of_chars::get_byte_as_char (s, 0);
                            #
                            \\ c = (c == c');
                        else
                            cv = make_array (s, s_len);
                            #
                            \\ c = (it::vector_of_chars::get_byte_as_char (cv, to_int c) != '\x00');
                        fi;
                    };
            end;

            fun not_contains "" =>    \\ c =  TRUE;
                #
                not_contains s
                    =>
                    {   s_len = it::vector_of_chars::length s;
                        #
                        if (s_len == 1)
                            #
                            c' = it::vector_of_chars::get_byte_as_char (s, 0);
                            #
                            \\ c =   c != c';
                        else
                            cv = make_array (s, s_len);
                            #
                            \\ c =   it::vector_of_chars::get_byte_as_char (cv, to_int c) == '\x00';
                        fi;
                    };
            end;
        end;             #  stipulate

        # For each character code we have an 8-bit vector, which is interpreted
        # as follows:
        #   0x01  ==  set for upper-case letters
        #   0x02  ==  set for lower-case letters
        #   0x04  ==  set for digits
        #   0x08  ==  set for white space characters
        #   0x10  ==  set for punctuation characters
        #   0x20  ==  set for control characters
        #   0x40  ==  set for hexadecimal characters
        #   0x80  ==  set for SPACE

        ctype_table = "\
                \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x28\x28\x28\x28\x28\x20\x20\
                \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
                \\x88\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\
                \\x44\x44\x44\x44\x44\x44\x44\x44\x44\x44\x10\x10\x10\x10\x10\x10\
                \\x10\x41\x41\x41\x41\x41\x41\x01\x01\x01\x01\x01\x01\x01\x01\x01\
                \\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x10\x10\x10\x10\x10\
                \\x10\x42\x42\x42\x42\x42\x42\x02\x02\x02\x02\x02\x02\x02\x02\x02\
                \\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x10\x10\x10\x10\x20\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
                \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
              \";


        fun in_set (c, s)
            =
            {   m = to_int (it::vector_of_chars::get_byte_as_char (ctype_table, to_int c));
                #
                (it::default_int::bitwise_and (m, s) != 0);
            };

        # Predicates on integer coding of Ascii values:
        #
        fun is_alpha c    = in_set (c, 0x03);
        fun is_upper c    = in_set (c, 0x01);
        fun is_lower c    = in_set (c, 0x02);

        fun is_digit        c =  in_set (c, 0x04);
        fun is_hex_digit    c =  in_set (c, 0x40);
        fun is_alphanumeric c =  in_set (c, 0x07);

        fun is_space c    = in_set (c, 0x08);
        fun is_punct c    = in_set (c, 0x10);

        fun is_graph c    = in_set (c, 0x17);
        fun is_print c    = in_set (c, 0x97);
        fun is_cntrl c    = in_set (c, 0x20);

        fun is_ascii c            = it::default_int::(<) (to_int c, 128);

        offset = ctoi 'a' - ctoi 'A';

        fun to_upper c = if (is_lower c)  itoc (ctoi c - offset); else c;fi;
        fun to_lower c = if (is_upper c)  itoc (ctoi c + offset); else c;fi;

        fun scan_digits is_digit getc n stream
            =
            scan (stream, n, [])
            where
                fun scan (stream, 0, l)
                        =>
                        (list::reverse l, stream);

                    scan (stream, i, l)
                        =>
                        case (getc stream)
                            #
                            NULL =>   (list::reverse l, stream);
                            #
                            THE (c, stream')
                                =>
                                is_digit c
                                  ??  scan (stream', i - 1, c ! l)
                                  ::  (list::reverse l, stream);
                      esac;
                end;
            end;

        fun check_digits radix (l, stream)
            =
            case ( number_scan::scan_int  radix  next  l)
                #
                THE (i, _)
                    =>
                    it::i1::(<) (i, 256)
                      ??  THE (from_int (it::i1::to_int i), stream)
                      ::  NULL;

                NULL => NULL;
            esac
            where
                fun next (x ! r) => THE (x, r);
                    next [] => NULL;
                end;
            end;


        # Conversions between characters
        # and printable representations:

        fun scan getc
            =
            scan'
            where
                fun scan' rep
                    = 
                    {   fun get2 rep
                            =
                            case (getc rep)
                                #
                                THE (c1, rep')
                                    =>
                                    case (getc rep')
                                        #
                                        THE (c2, rep'') =>  THE (c1, c2, rep'');
                                        _               =>  NULL;
                                    esac;

                                _   => NULL;
                            esac;


                        case (getc rep)
                            #
                            NULL => NULL;
                            #
                            THE('\\', rep')
                                =>
                                case (getc rep')
                                    #
                                    NULL => NULL;
                                    #
                                    THE('\\',rep'') =>  THE('\\', rep'');
                                    THE('"', rep'') =>  THE('"',  rep'');
                                    THE('a', rep'') =>  THE('\a', rep'');
                                    THE('b', rep'') =>  THE('\b', rep'');
                                    THE('t', rep'') =>  THE('\t', rep'');
                                    THE('n', rep'') =>  THE('\n', rep'');
                                    THE('v', rep'') =>  THE('\v', rep'');
                                    THE('f', rep'') =>  THE('\f', rep'');
                                    THE('r', rep'') =>  THE('\r', rep'');
                                    THE('^', rep'')
                                        =>
                                        case (getc rep'')
                                            #
                                            THE (c, rep''')
                                                =>
                                                if (('@' <= c) and (c <= '_'))    THE (from_int (to_int c - to_int '@'), rep''');
                                                else                              NULL;
                                                fi;

                                            NULL => NULL;
                                        esac;

                                    THE (d1, rep'')
                                        =>
                                        if (is_digit d1)
                                            #
                                            case (get2 rep'')
                                                #
                                                THE (d2, d3, rep''')
                                                    =>
                                                    {   fun convert d
                                                            =
                                                            (to_int d - to_int '0');

                                                        if (is_digit d2 and is_digit d3)
                                                            #
                                                            n = 100*(convert d1) + 10*(convert d2) + (convert d3);

                                                            if (it::default_int::(<) (n, 256))
                                                                 THE (from_int n, rep''');
                                                            else NULL;
                                                            fi;
                                                        else
                                                            NULL;
                                                        fi;
                                                    };

                                                NULL => NULL;
                                            esac;

                                        elif (is_space d1)

                                            # Skip over \<ws>+\ 
                                            #
                                            fun skip_ws stream
                                                =
                                                case (getc stream)
                                                    #
                                                    NULL =>  NULL;

                                                    THE('\\', stream')
                                                        =>
                                                        scan' stream';

                                                    THE (c, stream')
                                                        =>
                                                        if (is_space c)  skip_ws stream';
                                                        else             NULL;
                                                        fi;
                                                esac;


                                            skip_ws rep'';
                                        else
                                            NULL;
                                        fi;
                                esac;

                            THE ('"', rep')
                                =>
                                NULL;

                            THE (c, rep')
                                =>
                                if (is_print c)   THE (c, rep');
                                else              NULL;
                                fi;
                        esac;

                    };                          # fun scan'
            end;                                        # fun scan

        from_string
            =
            ns::scan_string scan;

        itoa =
            (nf::format_int ns::DECIMAL)
            o
            it::i1::from_int;

        fun to_string '\a' => "\\a";
            to_string '\b' => "\\b";
            to_string '\t' => "\\t";
            to_string '\n' => "\\n";
            to_string '\v' => "\\v";
            to_string '\f' => "\\f";
            to_string '\r' => "\\r";
            to_string '"' => "\\\"";
            to_string '\\' => "\\\\";

            to_string c
                =>
                if (is_print c)
                    #
                    it::poly_vector::get (ps::chars, to_int c);                         # NOTE: we should probably recognize the control characters  XXX SUCKO FIXME
                else
                    c' = to_int c;

                    if (it::default_int::(>) (c', 32))   ps::meld2 ("\\", itoa c');
                    else                                 ps::meld2 ("\\^", it::poly_vector::get (ps::chars, c'+64));
                    fi;
                fi;
        end;



        # Scanning function for C escape sequences 

        fun scan_c getc
            =
            scan
            where

                fun is_oct_digit d
                    =
                   '0' <=  d     and
                    d  <= '7';

                fun scan stream
                    =
                    case (getc stream)
                        #
                        NULL => NULL;

                        THE ('\\', stream')
                            =>
                            case (getc stream')
                                #
                                NULL => NULL;

                                THE ('a',  stream'') =>  THE ('\a', stream'');
                                THE ('b',  stream'') =>  THE ('\b', stream'');
                                THE ('t',  stream'') =>  THE ('\t', stream'');
                                THE ('n',  stream'') =>  THE ('\n', stream'');
                                THE ('v',  stream'') =>  THE ('\v', stream'');
                                THE ('f',  stream'') =>  THE ('\f', stream'');
                                THE ('r',  stream'') =>  THE ('\r', stream'');
                                THE ('\\', stream'') =>  THE ('\\', stream'');
                                THE ('"',  stream'') =>  THE ('"',  stream'');
                                THE ('\'', stream'') =>  THE ('\'', stream'');
                                THE ('?',  stream'') =>  THE ('?',  stream'');

                                THE ('x', stream'')
                                    =>
                                    # Hex escape code 
                                    #
                                    check_digits ns::HEX
                                        (scan_digits is_hex_digit getc -1 stream'');
                                _   =>
                                    # Should be octal escape code 
                                   check_digits ns::OCTAL
                                       (scan_digits is_oct_digit getc 3 stream');
                            esac;


    # NOT SURE ABOUT THE FOLLOWING TWO CASES: XXX BUGGO FIXME
    #               THE('"',  stream'') =>  NULL; #  error --- not escaped 
    #               THE('\'', stream'') =>  NULL; #  error --- not escaped 


                        THE (c, stream'')
                            =>
                            if (is_print c)   THE (c, stream'');
                            else              NULL;
                            fi;
                    esac;

          end;

        from_cstring
            =
            ns::scan_string scan_c;

        fun to_cstring '\a' => "\\a";
            to_cstring '\b' => "\\b";
            to_cstring '\t' => "\\t";
            to_cstring '\n' => "\\n";
            to_cstring '\v' => "\\v";
            to_cstring '\f' => "\\f";
            to_cstring '\r' => "\\r";
            to_cstring '"'  => "\\\"";
            to_cstring '\\' => "\\\\";
            to_cstring '?'  => "\\?";
            to_cstring '\'' => "\\'";

            to_cstring '\x00' => "\\0";

            to_cstring c
                =>
                if (is_print c)
                    #
                    it::poly_vector::get (ps::chars, to_int c);
                else
                    i = it::i1::from_int (to_int c);
                    #
                    prefix =    if (it::i1::(<) (i, 8))
                                    #
                                    "\\00";
                                else
                                    it::i1::(<) (i, 64)
                                      ??  "\\0"
                                      ::  "\\";
                                fi;

                    ps::meld2 (prefix, nf::format_int ns::OCTAL i);
                fi;
        end;


        nul    = '\x00';
        ctrl_a = '\x01';
        ctrl_b = '\x02';
        ctrl_c = '\x03';
        ctrl_d = '\x04';
        ctrl_e = '\x05';
        ctrl_f = '\x06';
        ctrl_g = '\x07';
        ctrl_h = '\x08';
        ctrl_i = '\x09';
        ctrl_j = '\x0A';        newline = '\x0A';
        ctrl_k = '\x0B';
        ctrl_l = '\x0C';
        ctrl_m = '\x0D';        return  = '\x0D';
        ctrl_n = '\x0E';
        ctrl_o = '\x0F';
        ctrl_p = '\x10';
        ctrl_q = '\x11';
        ctrl_r = '\x12';
        ctrl_s = '\x13';
        ctrl_t = '\x14';
        ctrl_u = '\x15';
        ctrl_v = '\x16';
        ctrl_w = '\x17';
        ctrl_x = '\x18';
        ctrl_y = '\x19';
        ctrl_z = '\x1A';
        del    = '\xFF';
    };                          # package char 
end;


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext