PreviousUpNext

15.4.867  src/lib/regex/glue/regular-expression-matcher-g.pkg

## regular-expression-matcher-g.pkg

# Compiled by:
#     src/lib/std/standard.lib

# Generic that implements a regular expressions matcher by combining
# a surface syntax and a matching engine.

# This generic is invoked in:
#
#     src/lib/regex/regex.pkg
#     src/lib/regex/awk-nfa-regex.pkg
#     src/lib/regex/awk-dfa-regex.pkg
#     src/lib/regex/demo/demo.pkg
#     src/app/c-glue-maker/main.pkg
#     src/lib/c-glue/ml-grinder/regexp-lib.pkg
#     src/app/future-lex/src/backends/expand-file.pkg



generic package regular_expression_matcher_g (
    package p:  Regular_Expression_Parser;              # Regular_Expression_Parser     is from   src/lib/regex/front/parser.api
    package e:  Regular_Expression_Engine;              # Regular_Expression_Engine     is from   src/lib/regex/backend/regular-expression-engine.api
)
:
Regular_Expression_Matcher                              # Regular_Expression_Matcher    is from   src/lib/regex/glue/regular-expression-matcher.api
where  Compiled_Regular_Expression
       ==
       e::Compiled_Regular_Expression
=
package {

    package m = regex_match_result;                     # regex_match_result            is from   src/lib/regex/glue/regex-match-result.pkg
        
    package r =   p::r;

    Compiled_Regular_Expression =  e::Compiled_Regular_Expression;

    fun compile reader stream
        =
        case (p::scan reader stream) 
            #     
            THE (syntax, stream') => {
               v = e::compile syntax;

                   THE (v, stream');
               };

            NULL => NULL;
        esac;

                                                        # number_string                 is from   src/lib/std/src/number-string.pkg
    fun compile_string str
        =
        case (number_string::scan_string p::scan str)
            #     
#           THE r =>    e::compile r; 

            THE r =>    {  result = e::compile r; 
                           result; 
                        };
            NULL  =>    raise exception abstract_regular_expression::CANNOT_PARSE;
        esac;

    prefix = e::prefix;
    find   = e::find;

    fun stream_match l
        =
        {   fun parse (s, f)
                =
                case (number_string::scan_string p::scan s)
                    #
                    THE r =>  (r, f);
                    NULL  =>  raise exception abstract_regular_expression::CANNOT_PARSE;
                esac;

            m =  e::match (map parse l);
          
            \\ getc =   \\ stream =   m getc stream;
        };



    # The following stuff is from Allen Leung's
    # "lazy man's interface to the regex library":

    # For caching compiled regex 
    #
    package sht
        =
        typelocked_hashtable_g (
            Hash_Key = String;
            hash_value = hash_string::hash_string;
            same_key = (==) : (String, String) -> Bool;
        );
    
    cache = sht::make_hashtable
              {
                size_hint               => 16,                          # Initial-size hint.
                not_found_exception => MATCH                    # Exception to raised by 'find'.
              }
            :
            sht::Hashtable( Compiled_Regular_Expression );



    fun cached_compile regex
        =
        case (sht::find cache regex)
            #     
            THE re => re;

            NULL   => {   re = compile_string regex;
                          sht::set cache (regex, re);
                          re;
                      };
        esac;


    fun search regex text
        =
        number_string::scan_string
            (find (cached_compile regex))
            text;


    fun get_args text children
        = 
        list::cat (map walk children)
        where
            fun walk (m::REGEX_MATCH_RESULT (THE { match_position, match_length }, children))
                    => 
                    {   s =   string::substring (text, match_position, match_length);
                        s ! list::cat (map walk children);
                    };

                walk (m::REGEX_MATCH_RESULT (NULL, children))
                    =>
                    "" ! list::cat (map walk children);
            end;

        end;


    fun find_first_match_to_regex_and_return_all_groups  regex  text
        = 
        case (search regex text)
            #     
            THE (m::REGEX_MATCH_RESULT(_, children))
                =>
                THE (get_args  text  children);

            NULL => NULL;                                       # Used to:   raise exception NOT_FOUND;
        esac;


    fun find_first_match_to_ith_group  i  regex  text
        =
        case (search regex text)
            #
            THE m =>    case (m::nth (m, i))
                            #
                            THE { match_position, match_length }
                                =>
                                THE (string::substring (text, match_position, match_length));

                            NULL =>  THE "";
                        esac
                        except
                            _ =  THE "";

            NULL => NULL;                               # Used to be    raise exception NOT_FOUND;
        esac;


    fun find_first_match_to_regex  regex
        =
        find_first_match_to_ith_group 0 regex;


    fun look regex text
        = 
        {   n =   size text;

            fun getc i
                =
                if (i >= n)   NULL;
                else          THE (string::get_byte_as_char (text, i), i+1);
                fi;

            find (cached_compile regex) getc;
        };


    fun find_all_matches_to_regex_and_return_values_of_ith_group  g  regex  text
        = 
        loop 0
        where
            look =   look regex text;

            fun loop s
                = 
                case (look s)
                    #             
                    THE (m, s)
                        => 
                        case (m::nth (m, g))
                            #
                            THE { match_position, match_length }
                                =>
                                string::substring (text, match_position, match_length) ! loop s;

                            NULL => loop s;
                       esac;

                    NULL => [];
                esac;
        end;


    fun find_all_matches_to_regex_and_return_all_values_of_all_groups  regex  text
        = 
        loop 0
        where
            look =   look regex text;

            fun loop s
                = 
                case (look s)
                    #             
                    THE (m::REGEX_MATCH_RESULT(_, children), s)
                        => 
                        get_args  text  children  !  loop s;

                    NULL => [];
                esac;
        end;


    fun find_all_matches_to_regex  regex
        =
        find_all_matches_to_regex_and_return_values_of_ith_group 0 regex;


    fun matches regex text
        =
        null_or::not_null (search regex text);


    fun text =~ regex
        =
        matches regex text;

    fun regex_case  text  { cases, default }
        = 
        loop cases
        where
            fun loop [] =>  default ();
                    #
                loop ((regex, action) ! rest)
                    =>
                    case (find_first_match_to_regex_and_return_all_groups regex text)
                        #
                        THE x =>  action x;
                        NULL  =>  loop rest;
                    esac;       
            end;
        end;


    fun replace_first_via_fn  regex f text
        = 
        case (search regex text)
            #     
            THE (m::REGEX_MATCH_RESULT (THE { match_position, match_length }, children))
                =>
                {   prefix =   string::extract (text, 0, THE match_position);
                    suffix =   string::extract (text, match_position + match_length, NULL);
                    prefix + f (get_args text children) + suffix;
                };

            THE _ => text;
            NULL  => text;
        esac;


    fun replace_all_via_fn  regex f text
        = 
        {   (string::cat (loop 0))
            except
                NOT_FOUND = text;
        }
        where
            look =   look regex text;

            fun loop s
                =
                case (look s)
                    #             
                    NULL =>     [ s == 0  ??  text  ::  string::extract (text, s, NULL) ];

                    THE (m::REGEX_MATCH_RESULT (THE { match_position, match_length }, children), s')
                        =>
                        {   prefix =   string::substring (text, s, match_position - s);

                            prefix ! f (get_args text children) ! loop s';
                        };

                    THE _ => raise exception NOT_FOUND;
                esac;

       end;


   fun replace_first  regex s =   replace_first_via_fn  regex (\\ _ = s);
   fun replace_all    regex s =   replace_all_via_fn    regex (\\ _ = s);

};


## COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies.
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.


Comments and suggestions to: bugs@mythryl.org

PreviousUpNext