diff options
Diffstat (limited to 'man/man3/regex.3')
| -rw-r--r-- | man/man3/regex.3 | 376 |
1 files changed, 376 insertions, 0 deletions
diff --git a/man/man3/regex.3 b/man/man3/regex.3 new file mode 100644 index 0000000000..e423e442d8 --- /dev/null +++ b/man/man3/regex.3 @@ -0,0 +1,376 @@ +.\" Copyright (C), 1995, Graeme W. Wilford. (Wilf.) +.\" +.\" SPDX-License-Identifier: Linux-man-pages-copyleft +.\" +.\" Wed Jun 14 16:10:28 BST 1995 Wilf. (G.Wilford@ee.surrey.ac.uk) +.\" Tiny change in formatting - aeb, 950812 +.\" Modified 8 May 1998 by Joseph S. Myers (jsm28@cam.ac.uk) +.\" +.\" show the synopsis section nicely +.TH REGEX 3 2021-03-22 "Linux man-pages (unreleased)" +.SH NAME +regcomp, regexec, regerror, regfree \- POSIX regex functions +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +.B #include <regex.h> +.PP +.BI "int regcomp(regex_t *restrict " preg ", const char *restrict " regex , +.BI " int " cflags ); +.BI "int regexec(const regex_t *restrict " preg \ +", const char *restrict " string , +.BI " size_t " nmatch ", regmatch_t " pmatch "[restrict]\ +, int " eflags ); +.PP +.BI "size_t regerror(int " errcode ", const regex_t *restrict " preg , +.BI " char *restrict " errbuf ", size_t " errbuf_size ); +.BI "void regfree(regex_t *" preg ); +.fi +.SH DESCRIPTION +.SS POSIX regex compiling +.BR regcomp () +is used to compile a regular expression into a form that is suitable +for subsequent +.BR regexec () +searches. +.PP +.BR regcomp () +is supplied with +.IR preg , +a pointer to a pattern buffer storage area; +.IR regex , +a pointer to the null-terminated string and +.IR cflags , +flags used to determine the type of compilation. +.PP +All regular expression searching must be done via a compiled pattern +buffer, thus +.BR regexec () +must always be supplied with the address of a +.BR regcomp ()-initialized +pattern buffer. +.PP +.I cflags +is the +.RB bitwise- or +of zero or more of the following: +.TP +.B REG_EXTENDED +Use +.B POSIX +Extended Regular Expression syntax when interpreting +.IR regex . +If not set, +.B POSIX +Basic Regular Expression syntax is used. +.TP +.B REG_ICASE +Do not differentiate case. +Subsequent +.BR regexec () +searches using this pattern buffer will be case insensitive. +.TP +.B REG_NOSUB +Do not report position of matches. +The +.I nmatch +and +.I pmatch +arguments to +.BR regexec () +are ignored if the pattern buffer supplied was compiled with this flag set. +.TP +.B REG_NEWLINE +Match-any-character operators don't match a newline. +.IP +A nonmatching list +.RB ( [\(ha...] ) +not containing a newline does not match a newline. +.IP +Match-beginning-of-line operator +.RB ( \(ha ) +matches the empty string immediately after a newline, regardless of +whether +.IR eflags , +the execution flags of +.BR regexec (), +contains +.BR REG_NOTBOL . +.IP +Match-end-of-line operator +.RB ( $ ) +matches the empty string immediately before a newline, regardless of +whether +.I eflags +contains +.BR REG_NOTEOL . +.SS POSIX regex matching +.BR regexec () +is used to match a null-terminated string +against the precompiled pattern buffer, +.IR preg . +.I nmatch +and +.I pmatch +are used to provide information regarding the location of any matches. +.I eflags +is the +.RB bitwise- or +of zero or more of the following flags: +.TP +.B REG_NOTBOL +The match-beginning-of-line operator always fails to match (but see the +compilation flag +.B REG_NEWLINE +above). +This flag may be used when different portions of a string are passed to +.BR regexec () +and the beginning of the string should not be interpreted as the +beginning of the line. +.TP +.B REG_NOTEOL +The match-end-of-line operator always fails to match (but see the +compilation flag +.B REG_NEWLINE +above). +.TP +.B REG_STARTEND +Use +.I pmatch[0] +on the input string, starting at byte +.I pmatch[0].rm_so +and ending before byte +.IR pmatch[0].rm_eo . +This allows matching embedded NUL bytes +and avoids a +.BR strlen (3) +on large strings. +It does not use +.I nmatch +on input, and does not change +.B REG_NOTBOL +or +.B REG_NEWLINE +processing. +This flag is a BSD extension, not present in POSIX. +.SS Byte offsets +Unless +.B REG_NOSUB +was set for the compilation of the pattern buffer, it is possible to +obtain match addressing information. +.I pmatch +must be dimensioned to have at least +.I nmatch +elements. +These are filled in by +.BR regexec () +with substring match addresses. +The offsets of the subexpression starting at the +.IR i th +open parenthesis are stored in +.IR pmatch[i] . +The entire regular expression's match addresses are stored in +.IR pmatch[0] . +(Note that to return the offsets of +.I N +subexpression matches, +.I nmatch +must be at least +.IR N+1 .) +Any unused structure elements will contain the value \-1. +.PP +The +.I regmatch_t +structure which is the type of +.I pmatch +is defined in +.IR <regex.h> . +.PP +.in +4n +.EX +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; +.EE +.in +.PP +Each +.I rm_so +element that is not \-1 indicates the start offset of the next largest +substring match within the string. +The relative +.I rm_eo +element indicates the end offset of the match, +which is the offset of the first character after the matching text. +.SS POSIX error reporting +.BR regerror () +is used to turn the error codes that can be returned by both +.BR regcomp () +and +.BR regexec () +into error message strings. +.PP +.BR regerror () +is passed the error code, +.IR errcode , +the pattern buffer, +.IR preg , +a pointer to a character string buffer, +.IR errbuf , +and the size of the string buffer, +.IR errbuf_size . +It returns the size of the +.I errbuf +required to contain the null-terminated error message string. +If both +.I errbuf +and +.I errbuf_size +are nonzero, +.I errbuf +is filled in with the first +.I "errbuf_size \- 1" +characters of the error message and a terminating null byte (\(aq\e0\(aq). +.SS POSIX pattern buffer freeing +Supplying +.BR regfree () +with a precompiled pattern buffer, +.I preg +will free the memory allocated to the pattern buffer by the compiling +process, +.BR regcomp (). +.SH RETURN VALUE +.BR regcomp () +returns zero for a successful compilation or an error code for failure. +.PP +.BR regexec () +returns zero for a successful match or +.B REG_NOMATCH +for failure. +.SH ERRORS +The following errors can be returned by +.BR regcomp (): +.TP +.B REG_BADBR +Invalid use of back reference operator. +.TP +.B REG_BADPAT +Invalid use of pattern operators such as group or list. +.TP +.B REG_BADRPT +Invalid use of repetition operators such as using \(aq*\(aq +as the first character. +.TP +.B REG_EBRACE +Un-matched brace interval operators. +.TP +.B REG_EBRACK +Un-matched bracket list operators. +.TP +.B REG_ECOLLATE +Invalid collating element. +.TP +.B REG_ECTYPE +Unknown character class name. +.TP +.B REG_EEND +Nonspecific error. +This is not defined by POSIX.2. +.TP +.B REG_EESCAPE +Trailing backslash. +.TP +.B REG_EPAREN +Un-matched parenthesis group operators. +.TP +.B REG_ERANGE +Invalid use of the range operator; for example, the ending point of the range +occurs prior to the starting point. +.TP +.B REG_ESIZE +Compiled regular expression requires a pattern buffer larger than 64\ kB. +This is not defined by POSIX.2. +.TP +.B REG_ESPACE +The regex routines ran out of memory. +.TP +.B REG_ESUBREG +Invalid back reference to a subexpression. +.SH ATTRIBUTES +For an explanation of the terms used in this section, see +.BR attributes (7). +.ad l +.nh +.TS +allbox; +lbx lb lb +l l l. +Interface Attribute Value +T{ +.BR regcomp (), +.BR regexec () +T} Thread safety MT-Safe locale +T{ +.BR regerror () +T} Thread safety MT-Safe env +T{ +.BR regfree () +T} Thread safety MT-Safe +.TE +.hy +.ad +.sp 1 +.SH STANDARDS +POSIX.1-2001, POSIX.1-2008. +.SH EXAMPLES +.EX +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <regex.h> + +#define ARRAY_SIZE(arr) (sizeof((arr)) / sizeof((arr)[0])) + +static const char *const str = + "1) John Driverhacker;\en2) John Doe;\en3) John Foo;\en"; +static const char *const re = "John.*o"; + +int main(void) +{ + static const char *s = str; + regex_t regex; + regmatch_t pmatch[1]; + regoff_t off, len; + + if (regcomp(®ex, re, REG_NEWLINE)) + exit(EXIT_FAILURE); + + printf("String = \e"%s\e"\en", str); + printf("Matches:\en"); + + for (int i = 0; ; i++) { + if (regexec(®ex, s, ARRAY_SIZE(pmatch), pmatch, 0)) + break; + + off = pmatch[0].rm_so + (s \- str); + len = pmatch[0].rm_eo \- pmatch[0].rm_so; + printf("#%d:\en", i); + printf("offset = %jd; length = %jd\en", (intmax_t) off, + (intmax_t) len); + printf("substring = \e"%.*s\e"\en", len, s + pmatch[0].rm_so); + + s += pmatch[0].rm_eo; + } + + exit(EXIT_SUCCESS); +} +.EE +.SH SEE ALSO +.BR grep (1), +.BR regex (7) +.PP +The glibc manual section, +.I "Regular Expressions" |
