aboutsummaryrefslogtreecommitdiffstats
path: root/man/man3/regex.3
diff options
context:
space:
mode:
Diffstat (limited to 'man/man3/regex.3')
-rw-r--r--man/man3/regex.3376
1 files changed, 376 insertions, 0 deletions
diff --git a/man/man3/regex.3 b/man/man3/regex.3
new file mode 100644
index 0000000000..e423e442d8
--- /dev/null
+++ b/man/man3/regex.3
@@ -0,0 +1,376 @@
+.\" Copyright (C), 1995, Graeme W. Wilford. (Wilf.)
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Wed Jun 14 16:10:28 BST 1995 Wilf. (G.Wilford@ee.surrey.ac.uk)
+.\" Tiny change in formatting - aeb, 950812
+.\" Modified 8 May 1998 by Joseph S. Myers (jsm28@cam.ac.uk)
+.\"
+.\" show the synopsis section nicely
+.TH REGEX 3 2021-03-22 "Linux man-pages (unreleased)"
+.SH NAME
+regcomp, regexec, regerror, regfree \- POSIX regex functions
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.B #include <regex.h>
+.PP
+.BI "int regcomp(regex_t *restrict " preg ", const char *restrict " regex ,
+.BI " int " cflags );
+.BI "int regexec(const regex_t *restrict " preg \
+", const char *restrict " string ,
+.BI " size_t " nmatch ", regmatch_t " pmatch "[restrict]\
+, int " eflags );
+.PP
+.BI "size_t regerror(int " errcode ", const regex_t *restrict " preg ,
+.BI " char *restrict " errbuf ", size_t " errbuf_size );
+.BI "void regfree(regex_t *" preg );
+.fi
+.SH DESCRIPTION
+.SS POSIX regex compiling
+.BR regcomp ()
+is used to compile a regular expression into a form that is suitable
+for subsequent
+.BR regexec ()
+searches.
+.PP
+.BR regcomp ()
+is supplied with
+.IR preg ,
+a pointer to a pattern buffer storage area;
+.IR regex ,
+a pointer to the null-terminated string and
+.IR cflags ,
+flags used to determine the type of compilation.
+.PP
+All regular expression searching must be done via a compiled pattern
+buffer, thus
+.BR regexec ()
+must always be supplied with the address of a
+.BR regcomp ()-initialized
+pattern buffer.
+.PP
+.I cflags
+is the
+.RB bitwise- or
+of zero or more of the following:
+.TP
+.B REG_EXTENDED
+Use
+.B POSIX
+Extended Regular Expression syntax when interpreting
+.IR regex .
+If not set,
+.B POSIX
+Basic Regular Expression syntax is used.
+.TP
+.B REG_ICASE
+Do not differentiate case.
+Subsequent
+.BR regexec ()
+searches using this pattern buffer will be case insensitive.
+.TP
+.B REG_NOSUB
+Do not report position of matches.
+The
+.I nmatch
+and
+.I pmatch
+arguments to
+.BR regexec ()
+are ignored if the pattern buffer supplied was compiled with this flag set.
+.TP
+.B REG_NEWLINE
+Match-any-character operators don't match a newline.
+.IP
+A nonmatching list
+.RB ( [\(ha...] )
+not containing a newline does not match a newline.
+.IP
+Match-beginning-of-line operator
+.RB ( \(ha )
+matches the empty string immediately after a newline, regardless of
+whether
+.IR eflags ,
+the execution flags of
+.BR regexec (),
+contains
+.BR REG_NOTBOL .
+.IP
+Match-end-of-line operator
+.RB ( $ )
+matches the empty string immediately before a newline, regardless of
+whether
+.I eflags
+contains
+.BR REG_NOTEOL .
+.SS POSIX regex matching
+.BR regexec ()
+is used to match a null-terminated string
+against the precompiled pattern buffer,
+.IR preg .
+.I nmatch
+and
+.I pmatch
+are used to provide information regarding the location of any matches.
+.I eflags
+is the
+.RB bitwise- or
+of zero or more of the following flags:
+.TP
+.B REG_NOTBOL
+The match-beginning-of-line operator always fails to match (but see the
+compilation flag
+.B REG_NEWLINE
+above).
+This flag may be used when different portions of a string are passed to
+.BR regexec ()
+and the beginning of the string should not be interpreted as the
+beginning of the line.
+.TP
+.B REG_NOTEOL
+The match-end-of-line operator always fails to match (but see the
+compilation flag
+.B REG_NEWLINE
+above).
+.TP
+.B REG_STARTEND
+Use
+.I pmatch[0]
+on the input string, starting at byte
+.I pmatch[0].rm_so
+and ending before byte
+.IR pmatch[0].rm_eo .
+This allows matching embedded NUL bytes
+and avoids a
+.BR strlen (3)
+on large strings.
+It does not use
+.I nmatch
+on input, and does not change
+.B REG_NOTBOL
+or
+.B REG_NEWLINE
+processing.
+This flag is a BSD extension, not present in POSIX.
+.SS Byte offsets
+Unless
+.B REG_NOSUB
+was set for the compilation of the pattern buffer, it is possible to
+obtain match addressing information.
+.I pmatch
+must be dimensioned to have at least
+.I nmatch
+elements.
+These are filled in by
+.BR regexec ()
+with substring match addresses.
+The offsets of the subexpression starting at the
+.IR i th
+open parenthesis are stored in
+.IR pmatch[i] .
+The entire regular expression's match addresses are stored in
+.IR pmatch[0] .
+(Note that to return the offsets of
+.I N
+subexpression matches,
+.I nmatch
+must be at least
+.IR N+1 .)
+Any unused structure elements will contain the value \-1.
+.PP
+The
+.I regmatch_t
+structure which is the type of
+.I pmatch
+is defined in
+.IR <regex.h> .
+.PP
+.in +4n
+.EX
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} regmatch_t;
+.EE
+.in
+.PP
+Each
+.I rm_so
+element that is not \-1 indicates the start offset of the next largest
+substring match within the string.
+The relative
+.I rm_eo
+element indicates the end offset of the match,
+which is the offset of the first character after the matching text.
+.SS POSIX error reporting
+.BR regerror ()
+is used to turn the error codes that can be returned by both
+.BR regcomp ()
+and
+.BR regexec ()
+into error message strings.
+.PP
+.BR regerror ()
+is passed the error code,
+.IR errcode ,
+the pattern buffer,
+.IR preg ,
+a pointer to a character string buffer,
+.IR errbuf ,
+and the size of the string buffer,
+.IR errbuf_size .
+It returns the size of the
+.I errbuf
+required to contain the null-terminated error message string.
+If both
+.I errbuf
+and
+.I errbuf_size
+are nonzero,
+.I errbuf
+is filled in with the first
+.I "errbuf_size \- 1"
+characters of the error message and a terminating null byte (\(aq\e0\(aq).
+.SS POSIX pattern buffer freeing
+Supplying
+.BR regfree ()
+with a precompiled pattern buffer,
+.I preg
+will free the memory allocated to the pattern buffer by the compiling
+process,
+.BR regcomp ().
+.SH RETURN VALUE
+.BR regcomp ()
+returns zero for a successful compilation or an error code for failure.
+.PP
+.BR regexec ()
+returns zero for a successful match or
+.B REG_NOMATCH
+for failure.
+.SH ERRORS
+The following errors can be returned by
+.BR regcomp ():
+.TP
+.B REG_BADBR
+Invalid use of back reference operator.
+.TP
+.B REG_BADPAT
+Invalid use of pattern operators such as group or list.
+.TP
+.B REG_BADRPT
+Invalid use of repetition operators such as using \(aq*\(aq
+as the first character.
+.TP
+.B REG_EBRACE
+Un-matched brace interval operators.
+.TP
+.B REG_EBRACK
+Un-matched bracket list operators.
+.TP
+.B REG_ECOLLATE
+Invalid collating element.
+.TP
+.B REG_ECTYPE
+Unknown character class name.
+.TP
+.B REG_EEND
+Nonspecific error.
+This is not defined by POSIX.2.
+.TP
+.B REG_EESCAPE
+Trailing backslash.
+.TP
+.B REG_EPAREN
+Un-matched parenthesis group operators.
+.TP
+.B REG_ERANGE
+Invalid use of the range operator; for example, the ending point of the range
+occurs prior to the starting point.
+.TP
+.B REG_ESIZE
+Compiled regular expression requires a pattern buffer larger than 64\ kB.
+This is not defined by POSIX.2.
+.TP
+.B REG_ESPACE
+The regex routines ran out of memory.
+.TP
+.B REG_ESUBREG
+Invalid back reference to a subexpression.
+.SH ATTRIBUTES
+For an explanation of the terms used in this section, see
+.BR attributes (7).
+.ad l
+.nh
+.TS
+allbox;
+lbx lb lb
+l l l.
+Interface Attribute Value
+T{
+.BR regcomp (),
+.BR regexec ()
+T} Thread safety MT-Safe locale
+T{
+.BR regerror ()
+T} Thread safety MT-Safe env
+T{
+.BR regfree ()
+T} Thread safety MT-Safe
+.TE
+.hy
+.ad
+.sp 1
+.SH STANDARDS
+POSIX.1-2001, POSIX.1-2008.
+.SH EXAMPLES
+.EX
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <regex.h>
+
+#define ARRAY_SIZE(arr) (sizeof((arr)) / sizeof((arr)[0]))
+
+static const char *const str =
+ "1) John Driverhacker;\en2) John Doe;\en3) John Foo;\en";
+static const char *const re = "John.*o";
+
+int main(void)
+{
+ static const char *s = str;
+ regex_t regex;
+ regmatch_t pmatch[1];
+ regoff_t off, len;
+
+ if (regcomp(&regex, re, REG_NEWLINE))
+ exit(EXIT_FAILURE);
+
+ printf("String = \e"%s\e"\en", str);
+ printf("Matches:\en");
+
+ for (int i = 0; ; i++) {
+ if (regexec(&regex, s, ARRAY_SIZE(pmatch), pmatch, 0))
+ break;
+
+ off = pmatch[0].rm_so + (s \- str);
+ len = pmatch[0].rm_eo \- pmatch[0].rm_so;
+ printf("#%d:\en", i);
+ printf("offset = %jd; length = %jd\en", (intmax_t) off,
+ (intmax_t) len);
+ printf("substring = \e"%.*s\e"\en", len, s + pmatch[0].rm_so);
+
+ s += pmatch[0].rm_eo;
+ }
+
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.BR grep (1),
+.BR regex (7)
+.PP
+The glibc manual section,
+.I "Regular Expressions"