PostgreSQL Source Code git master
ts_locale.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ts_locale.c
4 * locale compatibility layer for tsearch
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/ts_locale.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "common/string.h"
17#include "storage/fd.h"
18#include "tsearch/ts_locale.h"
19
20static void tsearch_readline_callback(void *arg);
21
22
23/* space for a single character plus a trailing NUL */
24#define WC_BUF_LEN 2
25
26int
27t_isalpha(const char *ptr)
28{
29 pg_wchar wstr[WC_BUF_LEN];
30 int wlen pg_attribute_unused();
31
32 wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
33 Assert(wlen <= 1);
34
35 /* pass single character, or NUL if empty */
36 return pg_iswalpha(wstr[0], pg_database_locale());
37}
38
39int
40t_isalnum(const char *ptr)
41{
42 pg_wchar wstr[WC_BUF_LEN];
43 int wlen pg_attribute_unused();
44
45 wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
46 Assert(wlen <= 1);
47
48 /* pass single character, or NUL if empty */
49 return pg_iswalnum(wstr[0], pg_database_locale());
50}
51
52
53/*
54 * Set up to read a file using tsearch_readline(). This facility is
55 * better than just reading the file directly because it provides error
56 * context pointing to the specific line where a problem is detected.
57 *
58 * Expected usage is:
59 *
60 * tsearch_readline_state trst;
61 *
62 * if (!tsearch_readline_begin(&trst, filename))
63 * ereport(ERROR,
64 * (errcode(ERRCODE_CONFIG_FILE_ERROR),
65 * errmsg("could not open stop-word file \"%s\": %m",
66 * filename)));
67 * while ((line = tsearch_readline(&trst)) != NULL)
68 * process line;
69 * tsearch_readline_end(&trst);
70 *
71 * Note that the caller supplies the ereport() for file open failure;
72 * this is so that a custom message can be provided. The filename string
73 * passed to tsearch_readline_begin() must remain valid through
74 * tsearch_readline_end().
75 */
76bool
78 const char *filename)
79{
80 if ((stp->fp = AllocateFile(filename, "r")) == NULL)
81 return false;
82 stp->filename = filename;
83 stp->lineno = 0;
84 initStringInfo(&stp->buf);
85 stp->curline = NULL;
86 /* Setup error traceback support for ereport() */
88 stp->cb.arg = stp;
90 error_context_stack = &stp->cb;
91 return true;
92}
93
94/*
95 * Read the next line from a tsearch data file (expected to be in UTF-8), and
96 * convert it to database encoding if needed. The returned string is palloc'd.
97 * NULL return means EOF.
98 */
99char *
101{
102 char *recoded;
103
104 /* Advance line number to use in error reports */
105 stp->lineno++;
106
107 /* Clear curline, it's no longer relevant */
108 if (stp->curline)
109 {
110 if (stp->curline != stp->buf.data)
111 pfree(stp->curline);
112 stp->curline = NULL;
113 }
114
115 /* Collect next line, if there is one */
116 if (!pg_get_line_buf(stp->fp, &stp->buf))
117 return NULL;
118
119 /* Validate the input as UTF-8, then convert to DB encoding if needed */
120 recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
121
122 /* Save the correctly-encoded string for possible error reports */
123 stp->curline = recoded; /* might be equal to buf.data */
124
125 /*
126 * We always return a freshly pstrdup'd string. This is clearly necessary
127 * if pg_any_to_server() returned buf.data, and we need a second copy even
128 * if encoding conversion did occur. The caller is entitled to pfree the
129 * returned string at any time, which would leave curline pointing to
130 * recycled storage, causing problems if an error occurs after that point.
131 * (It's preferable to return the result of pstrdup instead of the output
132 * of pg_any_to_server, because the conversion result tends to be
133 * over-allocated. Since callers might save the result string directly
134 * into a long-lived dictionary structure, we don't want it to be a larger
135 * palloc chunk than necessary. We'll reclaim the conversion result on
136 * the next call.)
137 */
138 return pstrdup(recoded);
139}
140
141/*
142 * Close down after reading a file with tsearch_readline()
143 */
144void
146{
147 /* Suppress use of curline in any error reported below */
148 if (stp->curline)
149 {
150 if (stp->curline != stp->buf.data)
151 pfree(stp->curline);
152 stp->curline = NULL;
153 }
154
155 /* Release other resources */
156 pfree(stp->buf.data);
157 FreeFile(stp->fp);
158
159 /* Pop the error context stack */
161}
162
163/*
164 * Error context callback for errors occurring while reading a tsearch
165 * configuration file.
166 */
167static void
169{
171
172 /*
173 * We can't include the text of the config line for errors that occur
174 * during tsearch_readline() itself. The major cause of such errors is
175 * encoding violations, and we daren't try to print error messages
176 * containing badly-encoded data.
177 */
178 if (stp->curline)
179 errcontext("line %d of configuration file \"%s\": \"%s\"",
180 stp->lineno,
181 stp->filename,
182 stp->curline);
183 else
184 errcontext("line %d of configuration file \"%s\"",
185 stp->lineno,
186 stp->filename);
187}
#define pg_attribute_unused()
Definition: c.h:137
ErrorContextCallback * error_context_stack
Definition: elog.c:95
#define errcontext
Definition: elog.h:198
int FreeFile(FILE *file)
Definition: fd.c:2840
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2641
Assert(PointerIsAligned(start, uint64))
unsigned int pg_wchar
Definition: mbprint.c:31
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:677
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:987
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void pfree(void *pointer)
Definition: mcxt.c:1594
void * arg
static char * filename
Definition: pg_dumpall.c:120
bool pg_get_line_buf(FILE *stream, StringInfo buf)
Definition: pg_get_line.c:95
bool pg_iswalnum(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1446
pg_locale_t pg_database_locale(void)
Definition: pg_locale.c:1172
bool pg_iswalpha(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1436
@ PG_UTF8
Definition: pg_wchar.h:232
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
struct ErrorContextCallback * previous
Definition: elog.h:297
void(* callback)(void *arg)
Definition: elog.h:298
StringInfoData buf
Definition: ts_locale.h:29
ErrorContextCallback cb
Definition: ts_locale.h:32
const char * filename
Definition: ts_locale.h:27
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:77
int t_isalnum(const char *ptr)
Definition: ts_locale.c:40
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:100
int t_isalpha(const char *ptr)
Definition: ts_locale.c:27
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:145
static void tsearch_readline_callback(void *arg)
Definition: ts_locale.c:168
#define WC_BUF_LEN
Definition: ts_locale.c:24