## regular-expression-matcher-g.pkg
# Compiled by:
#
src/lib/std/standard.lib# Generic that implements a regular expressions matcher by combining
# a surface syntax and a matching engine.
# This generic is invoked in:
#
#
src/lib/regex/regex.pkg#
src/lib/regex/awk-nfa-regex.pkg#
src/lib/regex/awk-dfa-regex.pkg#
src/lib/regex/demo/demo.pkg#
src/app/c-glue-maker/main.pkg#
src/lib/c-glue/ml-grinder/regexp-lib.pkg#
src/app/future-lex/src/backends/expand-file.pkggeneric package regular_expression_matcher_g (
package p: Regular_Expression_Parser; # Regular_Expression_Parser is from
src/lib/regex/front/parser.api package e: Regular_Expression_Engine; # Regular_Expression_Engine is from
src/lib/regex/backend/regular-expression-engine.api)
:
Regular_Expression_Matcher # Regular_Expression_Matcher is from
src/lib/regex/glue/regular-expression-matcher.apiwhere Compiled_Regular_Expression
==
e::Compiled_Regular_Expression
=
package {
package m = regex_match_result; # regex_match_result is from
src/lib/regex/glue/regex-match-result.pkg
package r = p::r;
Compiled_Regular_Expression = e::Compiled_Regular_Expression;
fun compile reader stream
=
case (p::scan reader stream)
#
THE (syntax, stream') => {
v = e::compile syntax;
THE (v, stream');
};
NULL => NULL;
esac;
# number_string is from
src/lib/std/src/number-string.pkg fun compile_string str
=
case (number_string::scan_string p::scan str)
#
# THE r => e::compile r;
THE r => { result = e::compile r;
result;
};
NULL => raise exception abstract_regular_expression::CANNOT_PARSE;
esac;
prefix = e::prefix;
find = e::find;
fun stream_match l
=
{ fun parse (s, f)
=
case (number_string::scan_string p::scan s)
#
THE r => (r, f);
NULL => raise exception abstract_regular_expression::CANNOT_PARSE;
esac;
m = e::match (map parse l);
\\ getc = \\ stream = m getc stream;
};
# The following stuff is from Allen Leung's
# "lazy man's interface to the regex library":
# For caching compiled regex
#
package sht
=
typelocked_hashtable_g (
Hash_Key = String;
hash_value = hash_string::hash_string;
same_key = (==) : (String, String) -> Bool;
);
cache = sht::make_hashtable
{
size_hint => 16, # Initial-size hint.
not_found_exception => MATCH # Exception to raised by 'find'.
}
:
sht::Hashtable( Compiled_Regular_Expression );
fun cached_compile regex
=
case (sht::find cache regex)
#
THE re => re;
NULL => { re = compile_string regex;
sht::set cache (regex, re);
re;
};
esac;
fun search regex text
=
number_string::scan_string
(find (cached_compile regex))
text;
fun get_args text children
=
list::cat (map walk children)
where
fun walk (m::REGEX_MATCH_RESULT (THE { match_position, match_length }, children))
=>
{ s = string::substring (text, match_position, match_length);
s ! list::cat (map walk children);
};
walk (m::REGEX_MATCH_RESULT (NULL, children))
=>
"" ! list::cat (map walk children);
end;
end;
fun find_first_match_to_regex_and_return_all_groups regex text
=
case (search regex text)
#
THE (m::REGEX_MATCH_RESULT(_, children))
=>
THE (get_args text children);
NULL => NULL; # Used to: raise exception NOT_FOUND;
esac;
fun find_first_match_to_ith_group i regex text
=
case (search regex text)
#
THE m => case (m::nth (m, i))
#
THE { match_position, match_length }
=>
THE (string::substring (text, match_position, match_length));
NULL => THE "";
esac
except
_ = THE "";
NULL => NULL; # Used to be raise exception NOT_FOUND;
esac;
fun find_first_match_to_regex regex
=
find_first_match_to_ith_group 0 regex;
fun look regex text
=
{ n = size text;
fun getc i
=
if (i >= n) NULL;
else THE (string::get_byte_as_char (text, i), i+1);
fi;
find (cached_compile regex) getc;
};
fun find_all_matches_to_regex_and_return_values_of_ith_group g regex text
=
loop 0
where
look = look regex text;
fun loop s
=
case (look s)
#
THE (m, s)
=>
case (m::nth (m, g))
#
THE { match_position, match_length }
=>
string::substring (text, match_position, match_length) ! loop s;
NULL => loop s;
esac;
NULL => [];
esac;
end;
fun find_all_matches_to_regex_and_return_all_values_of_all_groups regex text
=
loop 0
where
look = look regex text;
fun loop s
=
case (look s)
#
THE (m::REGEX_MATCH_RESULT(_, children), s)
=>
get_args text children ! loop s;
NULL => [];
esac;
end;
fun find_all_matches_to_regex regex
=
find_all_matches_to_regex_and_return_values_of_ith_group 0 regex;
fun matches regex text
=
null_or::not_null (search regex text);
fun text =~ regex
=
matches regex text;
fun regex_case text { cases, default }
=
loop cases
where
fun loop [] => default ();
#
loop ((regex, action) ! rest)
=>
case (find_first_match_to_regex_and_return_all_groups regex text)
#
THE x => action x;
NULL => loop rest;
esac;
end;
end;
fun replace_first_via_fn regex f text
=
case (search regex text)
#
THE (m::REGEX_MATCH_RESULT (THE { match_position, match_length }, children))
=>
{ prefix = string::extract (text, 0, THE match_position);
suffix = string::extract (text, match_position + match_length, NULL);
prefix + f (get_args text children) + suffix;
};
THE _ => text;
NULL => text;
esac;
fun replace_all_via_fn regex f text
=
{ (string::cat (loop 0))
except
NOT_FOUND = text;
}
where
look = look regex text;
fun loop s
=
case (look s)
#
NULL => [ s == 0 ?? text :: string::extract (text, s, NULL) ];
THE (m::REGEX_MATCH_RESULT (THE { match_position, match_length }, children), s')
=>
{ prefix = string::substring (text, s, match_position - s);
prefix ! f (get_args text children) ! loop s';
};
THE _ => raise exception NOT_FOUND;
esac;
end;
fun replace_first regex s = replace_first_via_fn regex (\\ _ = s);
fun replace_all regex s = replace_all_via_fn regex (\\ _ = s);
};
## COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies.
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.