PreviousUpNext

15.4.1185  src/lib/std/src/string-guts.pkg

## string-guts.pkg

# Compiled by:
#     src/lib/std/src/standard-core.sublib



###                  "Harp not on that string."
###
###                     -- William Shakespeare, "Henry VI"



stipulate
    package chr =  char;                                # char                  is from   src/lib/std/src/char.pkg
    package it  =  inline_t;                            # inline_t              is from   src/lib/core/init/built-in.pkg
    package ps  =  protostring;                         # protostring           is from   src/lib/std/src/protostring.pkg
    package rt  =  runtime;                             # runtime               is from   src/lib/core/init/runtime.pkg
    package xg  =  exceptions_guts;                     # exceptions_guts       is from   src/lib/std/src/exceptions-guts.pkg
herein

    package  string_guts
    : (weak) String                                     # String                is from   src/lib/std/src/string.api
    {
        (+)  =  it::default_int::(+);
        (-)  =  it::default_int::(-);
        (<)  =  it::default_int::(<);
        (<=) =  it::default_int::(<=);
        (>)  =  it::default_int::(>);
        (>=) =  it::default_int::(>=);

    #     (==) = it::(==);

        unsafe_get =  it::vector_of_chars::get;
        unsafe_set =  it::vector_of_chars::set;

        # List reverse 
        #
        fun reverse ([],     l) =>   l;
            reverse (x ! r,  l) =>   reverse (r, x ! l);
        end;

        Char   = Char;
        String = String;

        maximum_vector_length =  core::maximum_vector_length;

        # The length of a string:
        #
        length =  it::vector_of_chars::length;

        unsafe_create
            =
            rt::asm::make_string;

        # Allocate an uninitialized string of given length 
        #
        fun create n
            =
            if (it::default_int::ltu (maximum_vector_length, n))
                #
                raise exception xg::SIZE;
            else
                rt::asm::make_string n;
            fi;

        # Convert a character into a single character string 
        #
        fun from_char (c:  chr::Char) : String
            =
            it::poly_vector::get (ps::chars, it::cast c);

        # Get a character from a string 
        #
        get =  it::vector_of_chars::check_sub:  (String, Int) -> Char;


        # The (_[])   enables   'vec[index]'           notation;
        #
        my (_[]):  (String, Int) -> Char
            =
            it::vector_of_chars::check_sub;


        # Return the n-character substring of s starting at position i.
        # NOTE: we use words to check the right bound so as to avoid
        # raising overflow.
        #
        stipulate

            package w = it::default_unt;                # inline_t      is from   src/lib/core/init/built-in.pkg

        herein

            fun substring (s, i, n)
                =
                if  (((i < 0) or (n < 0)
                     or
                     w::(<) (w::from_int (size s), w::(+) (w::from_int i, w::from_int n)))
                )
                    raise exception xg::SUBSCRIPT;                              # exceptions_guts       is from   src/lib/std/src/exceptions-guts.pkg
                else
                    ps::unsafe_substring (s, i, n);
                fi;
        end;

        fun extract (v, base, opt_len)
            =
            {   len =   size v;
                #
                fun new_vec n
                    =
                    {   new_v =   rt::asm::make_string   n;
                        #
                        fun fill i
                            =
                            if (i < n)
                                #
                                unsafe_set (new_v, i, unsafe_get (v, base+i));
                                fill (i+1);
                            fi;

                        fill 0;

                        new_v;
                    };

                case (base, opt_len)
                    #
                    (0, NULL) => v;

                    (_, THE 0)
                        =>
                        if (base < 0  or  len < base)
                            #
                             raise exception xg::SUBSCRIPT;
                        else "";
                        fi;

                    (_, NULL)
                        =>
                        {   if (base < 0  or  len < base)   raise exception xg::SUBSCRIPT;      fi;
                            #
                            if (base == len)    "";
                            else                new_vec (len - base);
                            fi;
                        };

                    (_, THE 1)
                        =>
                        {   if (base < 0  or  len < base+1)   raise exception xg::SUBSCRIPT;   fi;
                            #
                            str (unsafe_get (v, base));
                        };

                    (_, THE n)
                        =>
                        {   if (base < 0  or  n < 0  or  len < base+n)   raise exception xg::SUBSCRIPT;         fi;
                            #
                            new_vec n;
                        };
                esac;
            };

        # Concatenate a list of strings:
        #
        fun cat [ string ]
                =>
                string;

            cat (sl:  List( String ))
                =>
                {   fun length (i, [])
                            =>
                            i;

                        length (i, s ! rest)
                            =>
                            length (i+size s, rest);
                    end;

                    case (length (0, sl))
                        #
                        0 => "";

                        1 =>    find sl
                                where
                                    fun find ("" ! r) =>   find r;
                                        find ( s ! _) =>   s;
                                        find _        =>   "";          # Impossible.
                                    end;
                                end;

                        tot_len
                            =>
                            {   ss =   create tot_len;

                                fun copy ([], _)
                                        =>
                                        ();

                                   copy (s ! r,  i)
                                        =>
                                        {   len =   size s;

                                            fun copy' j
                                                =
                                                if (j != len)
                                                    #
                                                    unsafe_set (ss, i+j, unsafe_get (s, j));

                                                    copy'(j+1);
                                                fi;

                                            copy' 0;

                                            copy (r, i+len);
                                        };
                                end;

                                copy (sl, 0);

                                ss;
                            };
                    esac;
                };
        end;                    #  cat




        # Concatenate a list of strings using the
        # given separator string, so
        #     join  " "  ["an", "example"]
        #     ->
        #     "an example" 
        #
        fun join _ []  =>  "";
            join _ [x] =>  x;

            join sep (h ! t)
                 =>
                 cat (
                     reverse (
                         fold_forward
                             (fn (x, l) =  x ! sep ! l)
                             [h]
                             t,
                         []
                     )
                 );
        end;



        # As above, with null delimiters:



        fun implode [] =>   "";                                         # Implode a list of characters into a string.
            #
            implode cl
                =>
                {   fun length ([],     n) =>  n;
                        length (_ ! r,  n) =>  length (r, n+1);
                    end;

                    ps::implode (length (cl, 0), cl);
                };
        end;



        fun explode s                                                   # Explode a string into a list of characters.
            =
            f (NIL, size s - 1)
            where
                fun f (l, -1) =>   l;
                    f (l,  i) =>   f (unsafe_get (s, i) ! l,  i - 1);
                end;
            end;

        fun map f vec
            =
            case (size vec)
                #         
                0   => "";
                #
                len =>  {   new_vec =  rt::asm::make_string  len;

                            mapf 0
                            where       
                                fun mapf i
                                    =
                                    if (i < len)
                                        #                               
                                        unsafe_set (new_vec, i, f (unsafe_get (vec, i)));
                                        mapf (i+1);
                                    fi;
                            end;

                            new_vec;
                        };
            esac;



        #  Map a translation function across the characters of a string 
        #
        fun translate tr s
            =
            ps::translate (tr, s, 0, size s);




        fun tokens  is_delimiter  s                     # Tokenize a string using the given predicate
            =                                   # to define the delimiter characters.
            reverse (scan_token (0, 0, []), [])
            where

                n =   size s;

                fun substr (i, j, l)
                    =
                    if (i == j)   l;
                    else          ps::unsafe_substring (s, i, j-i)  !  l;
                    fi;

                fun scan_token (i, j, toks)
                    =
                    if (j < n)
                        #
                        if (is_delimiter (unsafe_get (s, j)))   skip_delimiters (j+1, substr (i, j, toks));
                        else                                    scan_token (i, j+1, toks);
                        fi;
                    else
                        substr (i, j, toks);
                    fi

               also
               fun skip_delimiters (j, toks)
                    =
                    if (j < n)
                        #                   
                        if (is_delimiter (unsafe_get (s, j)))   skip_delimiters (j+1, toks);
                        else                                    scan_token (j, j+1, toks);
                        fi;
                    else
                        toks;
                    fi;
            end;


        fun fields is_delimiter s
            =
            {   n =   size s;

                reverse (scan_token (0, 0, []), [])
                where
                    fun scan_token (i, j, toks)
                        =
                        if (j < n)
                            #
                            if (is_delimiter (unsafe_get (s, j)))   scan_token (j+1, j+1, substr (i, j, toks));
                            else                                    scan_token (i, j+1, toks);
                            fi;
                        else
                            substr (i, j, toks);
                        fi
                        where
                            fun substr (i, j, l)
                                =
                                ps::unsafe_substring (s, i, j-i) ! l;
                        end;
                end;
            };

        #  String comparisons 
        #
        fun is_prefix s1 s2
            =
            ps::is_prefix (s1, s2, 0, size s2);

        fun is_suffix s1 s2
            =
            {   sz2 =   size s2;
                #
                ps::is_prefix (s1, s2, sz2 - size s1, sz2);
            };

        fun is_substring s
            =
            {   stringsearch =   ps::kmp s;
                #
                fun search s'
                    =
                    {   epos =   size s';

                        stringsearch (s', 0, epos) < epos;
                    };

                search;
            };

        fun compare (a, b)
            =
            ps::compare (a, 0, size a, b, 0, size b);

        fun compare_sequences compare_g (a, b)
            =
            ps::compare_sequences compare_g (a, 0, size a, b, 0, size b);


        fun has_alpha string =   list::exists  chr::is_alpha  (explode string);                         # For efficiency, should really have string::exists and string::all someday.  XXX SUCKO FIXME.
        fun has_upper string =   list::exists  chr::is_upper  (explode string);
        fun has_lower string =   list::exists  chr::is_lower  (explode string);

        fun is_alpha  string =   length string > 0   and   list::all  chr::is_alpha  (explode string);
        fun is_upper  string =   length string > 0   and   list::all  chr::is_upper  (explode string);
        fun is_lower  string =   length string > 0   and   list::all  chr::is_lower  (explode string);
        fun is_mixed  string =   is_alpha string  and  has_upper string  and  has_lower string;


        #  String greater or equal 
        #
        fun string_gt (a, b)
            =
            compare 0
            where
                al =   size a;
                bl =   size b;

                n =   if (al < bl)   al;
                      else           bl;
                      fi;

                fun compare i
                    =
                    if (i == n)
                        #
                        al > bl;
                    else
                        ai =   unsafe_get (a, i);
                        bi =   unsafe_get (b, i);

                        chr::(>) (ai, bi)
                        or
                        (   (ai == bi)
                            and
                            compare (i+1)
                        );
                    fi;
            end;

        fun (<=) (a, b) =   if (string_gt (a, b) ) FALSE; else TRUE; fi;
        fun (<)  (a, b) =   string_gt (b, a);

        fun (>=) (a, b)
            =
            b <= a;

        my (>) =  string_gt;

        fun from_string'  scan_char  s
            =
            accum (0, [])
            where
                len =   size s;

                fun getc i
                    =
                    if (it::default_int::(<) (i, len))
                        #                   
                        THE (unsafe_get (s, i), i+1);
                    else
                        NULL;
                    fi;

                scan_char =   scan_char getc;

                fun accum (i, chars)
                    =
                    case (scan_char i)
                        #
                        NULL
                            =>
                            if (it::default_int::(<) (i, len))    NULL;                                 #  Bad format 
                            else                                  THE (implode (list::reverse chars));
                            fi;
                        #
                        THE (c, i')
                            =>
                            accum (i',  c ! chars);
                    esac;
            end;

        fun (+) ("", s) =>   s;
            (+) (s, "") =>   s;
            (+) (x, y)  =>   ps::meld2 (x, y);
        end;


        # Concatenate a list of strings using the
        # given separator and delimiter strings, so
        #     join'  "("   " "   ")"   ["an", "example"]
        #     ->
        #     "(an example)"
        #
        fun join' _ _ _ []         =>  "";
            #
            join' start _ stop [x] =>  start + x + stop;
            #
            join' start sep stop (h ! t)
                =>
                cat (   
                    start
                    !
                    h
                    !
                    fold_backward
                        (fn (x, l) =  sep ! x ! l)
                        [ stop  ]
                        t
                );

        end;

        # Drop trailing newline on string, if present:
        #
        fun chomp ""
                =>
                "";

            chomp string
                =>
                {   len = length string;

                    if (get (string, len - 1) != '\n')   string;
                    else                                 extract (string, 0, THE (len - 1));
                    fi;
                };
        end; 
            # There's a shorter definition of this fn in   src/lib/compiler/toplevel/interact/read-eval-print-loop-g.pkg
            # -- should we use it instead?  XXX BUGGO FIXME



        to_lower =   map chr::to_lower;
        to_upper =   map chr::to_upper;

        fun to_mixed string                             # "THIS_is_tExt" -> "This_Is_Text"
            =
            to_mixed' (' ', explode string, [])
            where
                fun to_mixed' (_, [], chars)
                        =>
                        (implode (list::reverse chars));

                    to_mixed' (last, this ! rest, chars)
                        => 
                        if   (not (chr::is_alpha this))         to_mixed' (this, rest,                this ! chars);
                        elif (not (chr::is_alpha last))         to_mixed' (this, rest, chr::to_upper this ! chars);
                        else                                    to_mixed' (this, rest, chr::to_lower this ! chars);
                        fi;
                end;
            end;


        from_string =   from_string' chr::scan;
        to_string   =   translate chr::to_string;

        from_cstring =   from_string' chr::scan_c;
        to_cstring   =   translate chr::to_cstring;
    };                                                                  # package string
end;



Comments and suggestions to: bugs@mythryl.org

PreviousUpNext