## abstract-regular-expression.api
# Compiled by:
#
src/lib/std/standard.lib# This is the abstract syntax tree used
# to represent regular expressions.
#
# It serves as the common language between
# different frontends (implementing different RE specification languages), and
# different backends (implementing different compilation/searching algorithms).
api Abstract_Regular_Expression {
exception CANNOT_PARSE;
exception CANNOT_COMPILE;
# Parameterized by the character representation
# This generalization is for unicode support.
package char: Char;
package char_set: Set # Set is from
src/lib/src/set.api where
key::Key == char::Char;
Abstract_Regular_Expression
= GROUP Abstract_Regular_Expression
| ALT List( Abstract_Regular_Expression )
| CONCAT List( Abstract_Regular_Expression )
| INTERVAL ((Abstract_Regular_Expression, Int, Null_Or( Int )) )
# Match group at least m but no more than n times.
| MATCH_SET char_set::Set
| NONMATCH_SET char_set::Set
| CHAR char::Char
| OPTION Abstract_Regular_Expression
# == Interval (re, 0, THE 1)
| STAR Abstract_Regular_Expression
# == Interval (re, 0, NULL)
| PLUS Abstract_Regular_Expression
# == Interval (re, 1, NULL)
| BEGIN
# Matches beginning of stream
| END
# Matches end of stream
# Extensions
| ASSIGN ((Int, (String -> String), Abstract_Regular_Expression))
# Define a reference
| BACK_REF (((String -> String), Int))
# Back references
| GUARD (((String -> Bool), Abstract_Regular_Expression))
| BOUNDARY { prev: Null_Or( char::Char ),
this: Null_Or( char::Char ),
next: Null_Or( char::Char )
}
-> Bool;
add_range: (char_set::Set, char::Char, char::Char) -> char_set::Set;
all_chars: char_set::Set;
};
# Specialized to the usual Char
api Char_Abstract_Regular_Expression
=
Abstract_Regular_Expression
where
char == char;
# Some notes on the extensions:
#
# What ares ASSIGN and BACK_REF?
# ------------------------
#
# They are used to implement perl-like back references:
# For example, the perl regexp:
#
# /(.+)\1/
#
# is compiled into the syntax
#
# CONCAT (ASSIGN (1, \\ x = x, GROUP (PLUS all_chars)), BACK_REF (\\ x = x, 1))
#
# This matches repeated strings like:
#
# xyzxyz
# abab
#
# Optionally, BACK_REF and ASSIGN can apply a function on the back reference.
#
# For example, define
#
# fun rev_string s = string::implode o reverse o string::explode
#
# Then the syntax:
#
# CONCAT (ASSIGN (1, \\ s = s, GROUP (PLUS all_chars)),
# BACK_REF (THE (string::implode o reverse o string::explode), 1))
#
# matches palindromes like:
#
# xyzzyx
# abba
#
# What is GUARD?
# --------------
# GUARD allows an arbitrary predicate to be attached to a regexp.
#
# For example,
#
# CONCAT (ASSIGN (1, GROUP (PLUS all_chars)),
# GUARD (char::contains 'x' (BACK_REF (\\ x = x, 1))))
#
# matches repeated strings like
#
# xyzxyz
#
# but not
#
# abab
#
# because the character has.x to appear in the back reference
## COPYRIGHT (c) 1995 AT&T Bell Laboratories.
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.