module Pcre:sig..end
type error =
| |
Partial |
(* |
String only matched the pattern partially
| *) |
| |
BadPartial |
(* |
Pattern contains items that cannot be used together
with partial matching.
| *) |
| |
BadPattern of |
(* | BadPattern (msg, pos) regular
expression is malformed. The reason
is in msg, the position of the
error in the pattern in pos. | *) |
| |
BadUTF8 |
(* |
UTF8 string being matched is invalid
| *) |
| |
BadUTF8Offset |
(* |
Gets raised when a UTF8 string being matched with
offset is invalid.
| *) |
| |
MatchLimit |
(* |
Maximum allowed number of match attempts with
backtracking or recursion is reached during matching.
ALL FUNCTIONS CALLING THE MATCHING ENGINE MAY RAISE
IT!!!
| *) |
| |
RecursionLimit |
|||
| |
InternalError of |
(* | InternalError msg C-library exhibits unknown/undefined
behaviour. The reason is in msg. | *) |
exception Error of error
exception Backtrack
Backtrack used in callout functions to force backtracking.exception Regexp_or of string * error
Regexp_or (pat, error) gets raised for sub-pattern pat by regexp_or
if it failed to compile.type icflag
type irflag
typecflag =[ `ANCHORED
| `AUTO_CALLOUT
| `CASELESS
| `DOLLAR_ENDONLY
| `DOTALL
| `EXTENDED
| `EXTRA
| `FIRSTLINE
| `MULTILINE
| `NO_AUTO_CAPTURE
| `NO_UTF8_CHECK
| `UNGREEDY
| `UTF8 ]
val cflags : cflag list -> icflagcflags cflag_list converts a list of compilation flags to
their internal representation.val cflag_list : icflag -> cflag listcflag_list cflags converts internal representation of
compilation flags to a list.typerflag =[ `ANCHORED | `NOTBOL | `NOTEMPTY | `NOTEOL | `PARTIAL ]
val rflags : rflag list -> irflagrflags rflag_list converts a list of runtime flags to
their internal representation.val rflag_list : irflag -> rflag listrflag_list rflags converts internal representation of
runtime flags to a list.val version : string
Version of the PCRE-C-library
val config_utf8 : boolval config_newline : charval config_link_size : intval config_match_limit : intval config_match_limit_recursion : intval config_stackrecurse : booltypefirstbyte_info =[ `ANCHORED | `Char of char | `Start_only ]
typestudy_stat =[ `Not_studied | `Optimal | `Studied ]
type regexp
val options : regexp -> icflagoptions regexpregexp.val size : regexp -> intsize regexpregexp.val studysize : regexp -> intstudysize regexpregexp.val capturecount : regexp -> intcapturecount regexpregexp.val backrefmax : regexp -> intbackrefmax regexpregexp.val namecount : regexp -> intnamecount regexpregexp.val names : regexp -> string arraynames regexregexp.val nameentrysize : regexp -> intnameentrysize regexpregexp + 3.val firstbyte : regexp -> firstbyte_infofirstbyte regexpregexp.val firsttable : regexp -> string optionfirsttable regexpregexp if available, None otherwise.val lastliteral : regexp -> char optionlastliteral regexpregexp
if available, None otherwise.val study_stat : regexp -> study_statstudy_stat regexpregexp.val get_stringnumber : regexp -> string -> intget_stringnumber rex nameInvalid_arg if there is no such named substring.name in regular expression rex. This index can then be used with
get_substring.val get_match_limit : regexp -> int optionget_match_limit rexrex or None.val get_match_limit_recursion : regexp -> int optionget_match_limit_recursion rexrex or None.type chtables
val maketables : unit -> chtablesval regexp : ?study:bool ->
?limit:int ->
?limit_recursion:int ->
?iflags:icflag ->
?flags:cflag list -> ?chtables:chtables -> string -> regexpregexp ?study ?limit ?limit_recursion ?iflags ?flags ?chtables pattern
compiles pattern with flags when given, with iflags otherwise, and
with char tables chtables. If study is true, then the resulting regular
expression will be studied. If limit is specified, this sets a limit to
the amount of recursion and backtracking (only lower than the builtin
default!). If this limit is exceeded, MatchLimit will be raised during
matching.
For detailed documentation on how you can specify PERL-style regular
expressions (= patterns), please consult the PCRE-documentation
("man pcrepattern") or PERL-manuals.
See also www.perl.com
study : default = truelimit : default = no extra limit other than defaultlimit_recursion : default = no extra limit_recursion other than defaultiflags : default = no extra flagsflags : default = ignoredchtables : default = builtin char tablesval regexp_or : ?study:bool ->
?limit:int ->
?limit_recursion:int ->
?iflags:icflag ->
?flags:cflag list ->
?chtables:chtables -> string list -> regexpregexp_or ?study ?limit ?limit_recursion ?iflags ?flags ?chtables patterns
like Pcre.regexp, but combines patterns as alternatives (or-patterns) into
one regular expression.val quote : string -> stringquote strstr.type substrings
val get_subject : substrings -> stringget_subject substringssubstrings.val num_of_subs : substrings -> intnum_of_subs substringssubstrings
(whole match inclusive).val get_substring : substrings -> int -> stringget_substring substrings nInvalid_argument if n is not in the range of the number of
substrings.Not_found if the corresponding subpattern did not capture
a substring.nth substring
(0 is whole match) of substrings.val get_substring_ofs : substrings -> int -> int * intget_substring_ofs substrings nInvalid_argument if n is not in the range of the number
of substrings.Not_found if the corresponding subpattern did not capture
a substring.nth substring of substrings (0 is whole match).val get_substrings : ?full_match:bool -> substrings -> string arrayget_substrings ?full_match substringssubstrings. It includes the full match at index 0
when full_match is true, the captured substrings only when it
is false. If a subpattern did not capture a substring, the empty
string is returned in the corresponding position instead.full_match : default = trueval get_opt_substrings : ?full_match:bool -> substrings -> string option arrayget_opt_substrings ?full_match substringssubstrings. It includes Some full_match_str
at index 0 when full_match is true, Some captured_substrings
only when it is false. If a subpattern did not capture a substring,
None is returned in the corresponding position instead.full_match : default = trueval get_named_substring : regexp -> string -> substrings -> stringget_named_substring rex name substringsInvalid_argument if there is no such named substring.Not_found if the corresponding subpattern did not capture
a substring.name in regular expression rex and substrings.val get_named_substring_ofs : regexp -> string -> substrings -> int * intget_named_substring_ofs rex name substringsInvalid_argument if there is no such named substring.Not_found if the corresponding subpattern did not capture
a substring.name in regular expression rex and
substrings.type callout_data = {
|
callout_number : |
(* |
Callout number
| *) |
|
substrings : |
(* |
Substrings matched so far
| *) |
|
start_match : |
(* |
Subject start offset of current match attempt
| *) |
|
current_position : |
(* |
Subject offset of current match pointer
| *) |
|
capture_top : |
(* |
Number of the highest captured substring so far
| *) |
|
capture_last : |
(* |
Number of the most recently captured substring
| *) |
|
pattern_position : |
(* |
Offset of next match item in pattern string
| *) |
|
next_item_length : |
(* |
Length of next match item in pattern string
| *) |
typecallout =callout_data -> unit
Callouts are referred to in patterns as "(?Cn)" where "n" is a
callout_number ranging from 0 to 255. Substrings captured so far
are accessible as usual via substrings. You will have to consider
capture_top and capture_last to know about the current state of
valid substrings.
By raising exception Backtrack within a callout function, the user
can force the pattern matching engine to backtrack to other possible
solutions. Other exceptions will terminate matching immediately
and return control to OCaml.
val pcre_exec : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> int arraypcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subjNot_found if pattern does not match.subj starting at position pos with pattern pat when
given, regular expression rex otherwise. The array also contains
additional workspace needed by the match engine. Uses flags when
given, the precompiled iflags otherwise. Callouts are handled by
callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval exec : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> substringsexec ?iflags ?flags ?rex ?pat ?pos ?callout subjNot_found if pattern does not match.subj starting at position pos with pattern
pat when given, regular expression rex otherwise. Uses flags
when given, the precompiled iflags otherwise. Callouts are handled
by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval exec_all : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?callout:callout -> string -> substrings arrayexec_all ?iflags ?flags ?rex ?pat ?pos ?callout subjNot_found if pattern does not match.subj starting at position pos with pattern pat when
given, regular expression rex otherwise. Uses flags when given,
the precompiled iflags otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval next_match : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?callout:callout -> substrings -> substringsnext_match ?iflags ?flags ?rex ?pat ?pos ?callout substrsNot_found if pattern does not match.Invalid_arg if pos let matching start outside of
the subject string.substrs, jumping over pos characters (also
backwards!), using pattern pat when given, regular expression
rex otherwise. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval extract : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string arrayextract ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subjNot_found if pattern does not match.subj starting at
position pos, using pattern pat when given, regular expression
rex otherwise. Uses flags when given, the precompiled iflags
otherwise. It includes the full match at index 0 when full_match is
true, the captured substrings only when it is false. Callouts are
handled by callout. If a subpattern did not capture a substring,
the empty string is returned in the corresponding position instead.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval extract_opt : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string option arrayextract_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subjNot_found if pattern does not match.subj starting
at position pos, using pattern pat when given, regular expression
rex otherwise. Uses flags when given, the precompiled iflags
otherwise. It includes Some full_match_str at index 0 when
full_match is true, Some captured-substrings only when it is
false. Callouts are handled by callout. If a subpattern did
not capture a substring, None is returned in the corresponding
position instead.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval extract_all : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string array arrayextract_all ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subjNot_found if pattern does not match.subj starting at position pos, using pattern pat when given,
regular expression rex otherwise. Uses flags when given, the
precompiled iflags otherwise. It includes the full match at index
0 of the extracted string arrays when full_match is true, the
captured substrings only when it is false. Callouts are handled by
callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval extract_all_opt : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool ->
?callout:callout -> string -> string option array arrayextract_all_opt
?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subjNot_found if pattern does not match.subj starting at position pos, using pattern pat when
given, regular expression rex otherwise. Uses flags when given,
the precompiled iflags otherwise. It includes Some full_match_str
at index 0 of the extracted string arrays when full_match is true,
Some captured_substrings only when it is false. Callouts are
handled by callout. If a subpattern did not capture a substring,
None is returned in the corresponding position instead.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval pmatch : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> boolpmatch ?iflags ?flags ?rex ?pat ?pos ?callout subjtrue
if subj is matched by pattern pat when given, regular expression
rex otherwise, starting at position pos. Uses flags when given,
the precompiled iflags otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutstype substitution
val subst : string -> substitutionsubst str converts the string str representing a
substitution pattern to the internal representation
The contents of the substitution string str can be normal text
mixed with any of the following (mostly as in PERL):
0-9+" from an immediately
following other number.val replace : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?itempl:substitution ->
?templ:string -> ?callout:callout -> string -> stringreplace ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos with
the substitution string templ when given, itempl otherwise. Uses
flags when given, the precompiled iflags otherwise. Callouts
are handled by callout.Failure if there are backreferences to nonexistent subpatterns.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0itempl : default = empty stringtempl : default = ignoredcallout : default = ignore calloutsval qreplace : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?templ:string -> ?callout:callout -> string -> stringqreplace ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos
with the string templ. Uses flags when given, the precompiled
iflags otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0templ : default = ignoredcallout : default = ignore calloutsval substitute_substrings : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout ->
subst:(substrings -> string) -> string -> stringsubstitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos
with the result of function subst applied to the substrings
of the match. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval substitute : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout -> subst:(string -> string) -> string -> stringsubstitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos with
the result of function subst applied to the match. Uses flags
when given, the precompiled iflags otherwise. Callouts are handled
by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval replace_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?itempl:substitution ->
?templ:string -> ?callout:callout -> string -> stringreplace_first ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position
pos with the substitution string templ when given, itempl
otherwise. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.Failure if there are backreferences to nonexistent subpatterns.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0itempl : default = empty stringtempl : default = ignoredcallout : default = ignore calloutsval qreplace_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?templ:string -> ?callout:callout -> string -> stringqreplace_first ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position pos
with the string templ. Uses flags when given, the precompiled
iflags otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0templ : default = ignoredcallout : default = ignore calloutsval substitute_substrings_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout ->
subst:(substrings -> string) -> string -> stringsubstitute_substrings_first
?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position
pos with the result of function subst applied to the substrings
of the match. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval substitute_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout -> subst:(string -> string) -> string -> stringsubstitute_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position
pos with the result of function subst applied to the match. Uses
flags when given, the precompiled iflags otherwise. Callouts
are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval split : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?max:int -> ?callout:callout -> string -> string listsplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj splits subj
into a list of at most max strings, using as delimiter pattern
pat when given, regular expression rex otherwise, starting at
position pos. Uses flags when given, the precompiled iflags
otherwise. If max is zero, trailing empty fields are stripped. If
it is negative, it is treated as arbitrarily large. If neither pat
nor rex are specified, leading whitespace will be stripped! Should
behave exactly as in PERL. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0max : default = 0callout : default = ignore calloutsval asplit : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?max:int -> ?callout:callout -> string -> string arrayasplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj same as
Pcre.split buttype split_result =
| |
Text of |
(* |
Text part of split string
| *) |
| |
Delim of |
(* |
Delimiter part of split string
| *) |
| |
Group of |
(* |
Subgroup of matched delimiter
(subgroup_nr, subgroup_str)
| *) |
| |
NoGroup |
(* |
Unmatched subgroup
| *) |
Pcre.full_splitval full_split : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?max:int -> ?callout:callout -> string -> split_result listfull_split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj splits
subj into a list of at most max elements of type "split_result",
using as delimiter pattern pat when given, regular expression
rex otherwise, starting at position pos. Uses flags when given,
the precompiled iflags otherwise. If max is zero, trailing empty
fields are stripped. If it is negative, it is treated as arbitrarily
large. Should behave exactly as in PERL. Callouts are handled by
callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0max : default = 0callout : default = ignore calloutsval foreach_line : ?ic:Pervasives.in_channel -> (string -> unit) -> unitforeach_line ?ic f applies f to each line in inchannel ic until
the end-of-file is reached.ic : default = stdinval foreach_file : string list -> (string -> Pervasives.in_channel -> unit) -> unitforeach_file filenames f opens each file in the list filenames
for input and applies f to each filename and the corresponding
channel. Channels are closed after each operation (even when
exceptions occur - they get reraised afterwards!).val unsafe_pcre_exec : irflag ->
regexp ->
pos:int ->
subj_start:int ->
subj:string -> subgroups2:int -> int array -> callout option -> unitunsafe_pcre_exec flags rex ~pos ~subj_start ~subj ~subgroups2
offset_vector. You should read the C-source to know what happens.
If you do not understand it - don't use this function!val make_ovector : regexp -> int * int arraymake_ovector regexp calculates the tuple (subgroups2, ovector)
which is the number of subgroup offsets and the offset array.