PreviousUpNext

15.4.1002  src/lib/src/scanf.pkg

## scanf.pkg

# Compiled by:
#     src/lib/std/standard.lib

# C-style conversions from string representations.

stipulate
    package fil =  file__premicrothread;                                # file__premicrothread  is from   src/lib/std/src/posix/file--premicrothread.pkg
    package ss  =  substring;                                           # substring             is from   src/lib/std/substring.pkg
    package sc  =  number_string;                                       # number_string         is from   src/lib/std/src/number-string.pkg
herein

    package   scanf
    : (weak)  Scanf                                                     # Scanf                 is from   src/lib/src/scanf.api
    {
        include package   printf_field;                                 # printf_field          is from   src/lib/src/printf-field.pkg


        # Implement rough-and-ready char sets as 256-byte vectors
        # with entries set to 0 or 1 for non/membership:
        #
        stipulate
            Charset = CS  rw_vector_of_one_byte_unts::Rw_Vector;        # Start of abstype-replacement recipe -- see http://successor-ml.org/index.php?title=Degrade_abstype_to_derived_form 
        herein                                                          #
            Charset = Charset;                                          # End of abstype-replacement recipe.

            fun make_char_set ()
                =
                CS (rw_vector_of_one_byte_unts::make_rw_vector  (char::max_ord+1, 0u0));


            fun add_char (CS byte_array, char)
                =
                rw_vector_of_one_byte_unts::set  (byte_array,  char::to_int char,  0u1);


            fun add_range (CS ba, c1, c2)
                =
                {   ord_c2 =  char::to_int c2;

                    fun add i
                        =
                        if (i <= ord_c2)
                             rw_vector_of_one_byte_unts::set (ba, i, 0u1);
                             add (i+1);
                        fi;

                    if (c1 > c2)    raise exception BAD_FORMAT "Bad char-set spec";  fi;

                    add (char::to_int c1);
                };


            fun in_set  (CS ba)  arg
                =
                rw_vector_of_one_byte_unts::get (ba, char::to_int arg)  ==  0u1;


            fun not_in_set  (CS ba)  arg
                =
                rw_vector_of_one_byte_unts::get (ba, char::to_int arg)  ==  0u0;

        end;



        # Scan a character-class spec
        # like "[a-b]" or "[^abc]":
        #
        fun scan_char_set  fmt_string
            =
            {   cset =  make_char_set ();


                # Check for leading '^'
                # (negated char class like "[^a-z]"):

                my (is_negated, fmt_string)
                    =
                    case (ss::getc  fmt_string)
                        #
                        THE ('^', ss) =>  (TRUE,  ss        );
                        _             =>  (FALSE, fmt_string);
                    esac;


                # Check that we -do- have a charclass spec to scan:
                #
                fun scan_char (THE arg) =>  scan arg;
                    scan_char NULL      =>  raise exception BAD_FORMAT "Missing charclass spec";
                end


                # Here to read the next vanilla
                # character from a character class:
                #       
                also
                fun scan (next_char, char_stream)
                    =
                    case (ss::getc  char_stream)
                        #
                        THE ('-', char_stream)
                            =>
                            case (ss::getc  char_stream)
                                #
                                THE (']', char_stream)
                                    =>
                                    {   add_char (cset, next_char);
                                        add_char (cset, '-'      );
                                        char_stream;
                                    };

                                THE (c, char_stream)
                                    =>
                                    {   add_range (cset, next_char, c);
                                        post_dash_scan  char_stream;
                                    };

                                NULL
                                    =>
                                    raise exception BAD_FORMAT "Incomplete char class";
                            esac;

                        THE (']', char_stream) =>   {   add_char (cset, next_char);   char_stream;           };
                        THE (  c, char_stream) =>   {   add_char (cset, next_char);   scan (c, char_stream); };

                        NULL          =>   raise exception BAD_FORMAT "Incomplete char class";
                    esac

                # Here to complete a character range,
                # say when we've seen "[a-" of "[a-z]":
                #
                also
                fun post_dash_scan  char_stream
                    =
                    case (ss::getc  char_stream)
                        #
                        THE ('-', char_stream) =>  raise exception BAD_FORMAT "Incomplete char class";
                        THE (']', char_stream) =>  char_stream;
                        THE (c,   char_stream) =>  scan (c, char_stream);

                        NULL =>  raise exception BAD_FORMAT "Incomplete char class";
                    esac;



                # Scan the complete format string:
                #
                fmt_string
                    =
                    scan_char (ss::getc fmt_string);


                # Construct and return a charset corresponding
                # to the char-class spec we just scanned:
                #
                if   is_negated   (CHAR_SET (not_in_set  cset),  fmt_string);
                else              (CHAR_SET (    in_set  cset),  fmt_string);
                fi;
            };



        # Accept a string like "[a-z] %g"
        # and return a corresponding list of
        # Printf_Field RAW/CHAR_SET/FIELD results: 
        #
        fun compile_scan_format  format_string
            =
            scan (ss::from_string format_string, [])
            where

                split =  ss::split_off_prefix (char::not_contains "\n\t %[");

                fun scan (ss, l)
                    =
                    if (ss::is_empty  ss)
                        #
                        reverse l;
                    else
                        (split ss) -> (ss1, ss2);

                        case (ss::getc  ss2)
                            #
                            THE ('%', ss')
                                =>
                                {   (scan_field ss') ->  (field', ss3);

                                    scan (ss3, field' ! (RAW ss1) ! l);
                                };

                            THE ('[', ss')
                                =>
                                {   (scan_char_set ss') ->  (cs, ss3);

                                    scan (ss3, cs ! (RAW ss1) ! l);
                                };

                            THE (_, ss')
                                =>
                                scan (ss::drop_prefix char::is_space ss', (RAW ss1) ! l);

                            NULL =>   reverse ((RAW ss1) ! l);
                        esac;
                    fi;
                end;


        # * NOTE: for the time being, this ignores flags and field width *
        #
        fun fnsscanf
                ssub                    # Function which returns nth char from input string.
                next_index                      # Next char to read from input string.
                format_string           # Format string like "%e [a-z] %g" or such.
            =
            scan (next_index, printf_fields, [])
            where
                printf_fields   =  compile_scan_format  format_string;          # Convert 'format_string' from a String to a List( Printf_Field ).
                skip_whitespace =  sc::drop_prefix  char::is_space  ssub;


                # Peel off one Printf_Field at a time
                # and convert a corresponding chunk of input
                # string starting at 'next_index' into a
                # new value for result_items:

                fun scan (next_index,   [],   result_items)
                        =>
                        THE   (reverse  result_items,   next_index);

                    scan (next_index,   (RAW ss) ! remaining_fields,   result_items)
                        =>
                        match (skip_whitespace next_index, ss)
                        where
                            fun match (next_index, ss)
                                =
                                case (ssub next_index, ss::getc ss)
                                    #
                                    (THE (c', next_index'),   THE (c, ss))
                                        =>
                                        if  (c' == c  )  match (next_index', ss);
                                                     else   NULL;               fi;

                                    (_, NULL)
                                        =>
                                        scan (next_index, remaining_fields,  result_items);

                                    _ => NULL;
                                esac;
                        end;

                    scan (next_index,   (CHAR_SET prior) ! remaining_fields,   result_items)
                        =>
                        scan (scan_set next_index, remaining_fields,  result_items)
                        where
                            fun scan_set next_index
                                =
                                case (ssub next_index)
                                    #
                                    THE (c, next_index')
                                        =>
                                        if  (prior c  )  scan_set next_index';
                                                    else           next_index ;   fi;

                                    NULL => next_index;
                                esac;
                        end;

                    scan (next_index,   FIELD (flags, wid, type) ! remaining_fields,   result_items)
                        =>
                        {   next_index =  skip_whitespace  next_index;
                            #
                            fun next (con, THE (x, next_index'))
                                    =>
                                    scan (next_index',   remaining_fields,   (con x) ! result_items);

                                next _ => NULL;
                            end;

                            fun get_int  format
                                =
                                if flags.large  next (LINT, large_int::scan  format  ssub  next_index);
                                else            next (INT,        int::scan  format  ssub  next_index);
                                fi;

                            case type
                                #
                                OCTAL_FIELD   =>  get_int sc::OCTAL;
                                INT_FIELD     =>  get_int sc::DECIMAL;
                                HEX_FIELD     =>  get_int sc::HEX;
                                CAP_HEX_FIELD =>  get_int sc::HEX;
                                BINARY_FIELD  =>  get_int sc::BINARY;

                                CHAR_FIELD    =>  next (CHAR,                    ssub next_index);
                                BOOL_FIELD    =>  next (BOOL,         bool::scan ssub next_index);
                                FLOAT_FIELD _ =>  next (FLOAT, eight_byte_float::scan ssub next_index);

                                STRING_FIELD
                                    =>
                                    scan (next_index,   remaining_fields,   STRING s ! result_items)
                                    where

                                        not_space =  not o char::is_space;

                                        prior = case wid
                                                    #
                                                    NO_PAD => not_space;

                                                    WIDTH n
                                                        =>
                                                        {   count =  REF n;

                                                            \\ c =  case *count
                                                                        #
                                                                        0 => FALSE;
                                                                        n => {   count :=  n - 1;
                                                                                 not_space c;
                                                                             };
                                                                    esac;
                                                        };
                                               esac;

                                        my (s, next_index)
                                            =
                                            sc::split_off_prefix prior ssub next_index;
                                    end;
                              esac;
                       };
                end;

            end;                        # fun scanf 



        # Scan an input string per given format_string,
        # return resulting list of Format_Items:
        #
        fun sscanf  input_string  format_string
            =
            {   max =  vector_of_chars::length  input_string;
                #
                fun string_subscript  index
                    = 
                    if (index < max)    THE (vector_of_chars::get  (input_string, index),  index+1);
                    else                NULL;
                    fi;

                first_index =  0;

                case (fnsscanf   string_subscript   first_index   format_string)
                    #
                    THE (x, _) =>  THE x;
                    NULL       =>  NULL;
                esac;
            };


        # Same as above, reverse argument order.
        # (Sometimes this order is handier for curried application.)
        #
        fun sscanf_by   format_string   input_string
            =
            sscanf      input_string    format_string;



        # Scan from a fil::Input_Stream per given format_string,
        # return resulting list of Format_Items:
        #
        fun fscanf   input_stream   format_string
            =
            {   fun getc input_stream
                    =
                    case (fil::read_one  input_stream)
                        #
                        THE char => THE (char, input_stream);
                        NULL     => NULL;
                    esac;

                case (fnsscanf   getc   input_stream   format_string)
                    #
                    THE (x, _) =>  THE x;
                    NULL       =>  NULL;
                esac;
            };


        scanf =  fscanf  fil::stdin;

    };          # package scan 
end;











Comments and suggestions to: bugs@mythryl.org

PreviousUpNext