2020#include "common/unicode_category.h"
2121#include "mb/pg_wchar.h"
2222
23+ enum CaseMapResult
24+ {
25+ CASEMAP_SELF ,
26+ CASEMAP_SIMPLE ,
27+ CASEMAP_SPECIAL ,
28+ };
29+
2330static const pg_case_map * find_case_map (pg_wchar ucs );
2431static size_t convert_case (char * dst , size_t dstsize , const char * src , ssize_t srclen ,
2532 CaseKind str_casekind , bool full , WordBoundaryNext wbnext ,
2633 void * wbstate );
27- static bool check_special_conditions (int conditions , const char * str ,
28- size_t len , size_t offset );
34+ static enum CaseMapResult casemap (pg_wchar u1 , CaseKind casekind , bool full ,
35+ const char * src , size_t srclen , size_t srcoff ,
36+ pg_wchar * u2 , const pg_wchar * * special );
2937
3038pg_wchar
3139unicode_lowercase_simple (pg_wchar code )
@@ -214,8 +222,9 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
214222 {
215223 pg_wchar u1 = utf8_to_unicode ((unsigned char * ) src + srcoff );
216224 int u1len = unicode_utf8len (u1 );
217- const pg_case_map * casemap = find_case_map (u1 );
218- const pg_special_case * special = NULL ;
225+ pg_wchar simple = 0 ;
226+ const pg_wchar * special = NULL ;
227+ enum CaseMapResult casemap_result ;
219228
220229 if (str_casekind == CaseTitle )
221230 {
@@ -228,56 +237,47 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
228237 chr_casekind = CaseLower ;
229238 }
230239
231- /*
232- * Find special case that matches the conditions, if any.
233- *
234- * Note: only a single special mapping per codepoint is currently
235- * supported, though Unicode allows for multiple special mappings for
236- * a single codepoint.
237- */
238- if (full && casemap && casemap -> special_case )
239- {
240- int16 conditions = casemap -> special_case -> conditions ;
241-
242- Assert (casemap -> special_case -> codepoint == u1 );
243- if (check_special_conditions (conditions , src , srclen , srcoff ))
244- special = casemap -> special_case ;
245- }
240+ casemap_result = casemap (u1 , chr_casekind , full , src , srclen , srcoff ,
241+ & simple , & special );
246242
247- /* perform mapping, update result_len, and write to dst */
248- if (special )
243+ switch (casemap_result )
249244 {
250- for (int i = 0 ; i < MAX_CASE_EXPANSION ; i ++ )
251- {
252- pg_wchar u2 = special -> map [chr_casekind ][i ];
253- size_t u2len = unicode_utf8len (u2 );
254-
255- if (u2 == '\0' )
256- break ;
257-
258- if (result_len + u2len <= dstsize )
259- unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
260-
261- result_len += u2len ;
262- }
263- }
264- else if (casemap )
265- {
266- pg_wchar u2 = casemap -> simplemap [chr_casekind ];
267- pg_wchar u2len = unicode_utf8len (u2 );
268-
269- if (result_len + u2len <= dstsize )
270- unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
271-
272- result_len += u2len ;
273- }
274- else
275- {
276- /* no mapping; copy bytes from src */
277- if (result_len + u1len <= dstsize )
278- memcpy (dst + result_len , src + srcoff , u1len );
279-
280- result_len += u1len ;
245+ case CASEMAP_SELF :
246+ /* no mapping; copy bytes from src */
247+ Assert (simple == 0 );
248+ Assert (special == NULL );
249+ if (result_len + u1len <= dstsize )
250+ memcpy (dst + result_len , src + srcoff , u1len );
251+
252+ result_len += u1len ;
253+ break ;
254+ case CASEMAP_SIMPLE :
255+ {
256+ /* replace with single character */
257+ pg_wchar u2 = simple ;
258+ pg_wchar u2len = unicode_utf8len (u2 );
259+
260+ Assert (special == NULL );
261+ if (result_len + u2len <= dstsize )
262+ unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
263+
264+ result_len += u2len ;
265+ }
266+ break ;
267+ case CASEMAP_SPECIAL :
268+ /* replace with up to MAX_CASE_EXPANSION characters */
269+ Assert (simple == 0 );
270+ for (int i = 0 ; i < MAX_CASE_EXPANSION && special [i ]; i ++ )
271+ {
272+ pg_wchar u2 = special [i ];
273+ size_t u2len = unicode_utf8len (u2 );
274+
275+ if (result_len + u2len <= dstsize )
276+ unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
277+
278+ result_len += u2len ;
279+ }
280+ break ;
281281 }
282282
283283 srcoff += u1len ;
@@ -351,6 +351,10 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
351351 return true;
352352}
353353
354+ /*
355+ * Unicode allows for special casing to be applied only under certain
356+ * circumstances. The only currently-supported condition is Final_Sigma.
357+ */
354358static bool
355359check_special_conditions (int conditions , const char * str , size_t len ,
356360 size_t offset )
@@ -365,6 +369,51 @@ check_special_conditions(int conditions, const char *str, size_t len,
365369 return false;
366370}
367371
372+ /*
373+ * Map the given character to the requested case.
374+ *
375+ * If full is true, and a special case mapping is found and the conditions are
376+ * met, 'special' is set to the mapping result (which is an array of up to
377+ * MAX_CASE_EXPANSION characters) and CASEMAP_SPECIAL is returned.
378+ *
379+ * Otherwise, search for a simple mapping, and if found, set 'simple' to the
380+ * result and return CASEMAP_SIMPLE.
381+ *
382+ * If no mapping is found, return CASEMAP_SELF, and the caller should copy the
383+ * character without modification.
384+ */
385+ static enum CaseMapResult
386+ casemap (pg_wchar u1 , CaseKind casekind , bool full ,
387+ const char * src , size_t srclen , size_t srcoff ,
388+ pg_wchar * simple , const pg_wchar * * special )
389+ {
390+ const pg_case_map * map ;
391+
392+ if (u1 < 0x80 )
393+ {
394+ * simple = case_map [u1 ].simplemap [casekind ];
395+
396+ return CASEMAP_SIMPLE ;
397+ }
398+
399+ map = find_case_map (u1 );
400+
401+ if (map == NULL )
402+ return CASEMAP_SELF ;
403+
404+ if (full && map -> special_case != NULL &&
405+ check_special_conditions (map -> special_case -> conditions ,
406+ src , srclen , srcoff ))
407+ {
408+ * special = map -> special_case -> map [casekind ];
409+ return CASEMAP_SPECIAL ;
410+ }
411+
412+ * simple = map -> simplemap [casekind ];
413+
414+ return CASEMAP_SIMPLE ;
415+ }
416+
368417/* find entry in simple case map, if any */
369418static const pg_case_map *
370419find_case_map (pg_wchar ucs )
0 commit comments