@@ -457,13 +457,149 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
457457 Conf -> naffixes ++ ;
458458}
459459
460+
461+ /* Parsing states for parse_affentry() and friends */
460462#define PAE_WAIT_MASK 0
461- #define PAE_INMASK 1
463+ #define PAE_INMASK 1
462464#define PAE_WAIT_FIND 2
463- #define PAE_INFIND 3
465+ #define PAE_INFIND 3
464466#define PAE_WAIT_REPL 4
465- #define PAE_INREPL 5
467+ #define PAE_INREPL 5
468+ #define PAE_WAIT_TYPE 6
469+ #define PAE_WAIT_FLAG 7
466470
471+ /*
472+ * Parse next space-separated field of an .affix file line.
473+ *
474+ * *str is the input pointer (will be advanced past field)
475+ * next is where to copy the field value to, with null termination
476+ *
477+ * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
478+ *
479+ * Returns TRUE if we found a field, FALSE if not.
480+ */
481+ static bool
482+ get_nextfield (char * * str , char * next )
483+ {
484+ int state = PAE_WAIT_MASK ;
485+ int avail = BUFSIZ ;
486+
487+ while (* * str )
488+ {
489+ if (state == PAE_WAIT_MASK )
490+ {
491+ if (t_iseq (* str , '#' ))
492+ return false;
493+ else if (!t_isspace (* str ))
494+ {
495+ int clen = pg_mblen (* str );
496+
497+ if (clen < avail )
498+ {
499+ COPYCHAR (next , * str );
500+ next += clen ;
501+ avail -= clen ;
502+ }
503+ state = PAE_INMASK ;
504+ }
505+ }
506+ else /* state == PAE_INMASK */
507+ {
508+ if (t_isspace (* str ))
509+ {
510+ * next = '\0' ;
511+ return true;
512+ }
513+ else
514+ {
515+ int clen = pg_mblen (* str );
516+
517+ if (clen < avail )
518+ {
519+ COPYCHAR (next , * str );
520+ next += clen ;
521+ avail -= clen ;
522+ }
523+ }
524+ }
525+ * str += pg_mblen (* str );
526+ }
527+
528+ * next = '\0' ;
529+
530+ return (state == PAE_INMASK ); /* OK if we got a nonempty field */
531+ }
532+
533+ /*
534+ * Parses entry of an .affix file of MySpell or Hunspell format.
535+ *
536+ * An .affix file entry has the following format:
537+ * - header
538+ * <type> <flag> <cross_flag> <flag_count>
539+ * - fields after header:
540+ * <type> <flag> <find> <replace> <mask>
541+ *
542+ * str is the input line
543+ * field values are returned to type etc, which must be buffers of size BUFSIZ.
544+ *
545+ * Returns number of fields found; any omitted fields are set to empty strings.
546+ */
547+ static int
548+ parse_ooaffentry (char * str , char * type , char * flag , char * find ,
549+ char * repl , char * mask )
550+ {
551+ int state = PAE_WAIT_TYPE ;
552+ int fields_read = 0 ;
553+ bool valid = false;
554+
555+ * type = * flag = * find = * repl = * mask = '\0' ;
556+
557+ while (* str )
558+ {
559+ switch (state )
560+ {
561+ case PAE_WAIT_TYPE :
562+ valid = get_nextfield (& str , type );
563+ state = PAE_WAIT_FLAG ;
564+ break ;
565+ case PAE_WAIT_FLAG :
566+ valid = get_nextfield (& str , flag );
567+ state = PAE_WAIT_FIND ;
568+ break ;
569+ case PAE_WAIT_FIND :
570+ valid = get_nextfield (& str , find );
571+ state = PAE_WAIT_REPL ;
572+ break ;
573+ case PAE_WAIT_REPL :
574+ valid = get_nextfield (& str , repl );
575+ state = PAE_WAIT_MASK ;
576+ break ;
577+ case PAE_WAIT_MASK :
578+ valid = get_nextfield (& str , mask );
579+ state = -1 ; /* force loop exit */
580+ break ;
581+ default :
582+ elog (ERROR , "unrecognized state in parse_ooaffentry: %d" ,
583+ state );
584+ break ;
585+ }
586+ if (valid )
587+ fields_read ++ ;
588+ else
589+ break ; /* early EOL */
590+ if (state < 0 )
591+ break ; /* got all fields */
592+ }
593+
594+ return fields_read ;
595+ }
596+
597+ /*
598+ * Parses entry of an .affix file of Ispell format
599+ *
600+ * An .affix file entry has the following format:
601+ * <mask> > [-<find>,]<replace>
602+ */
467603static bool
468604parse_affentry (char * str , char * mask , char * find , char * repl )
469605{
@@ -618,8 +754,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
618754 int flag = 0 ;
619755 char flagflags = 0 ;
620756 tsearch_readline_state trst ;
621- int scanread = 0 ;
622- char scanbuf [BUFSIZ ];
623757 char * recoded ;
624758
625759 /* read file to find any flag */
@@ -682,8 +816,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
682816 }
683817 tsearch_readline_end (& trst );
684818
685- sprintf (scanbuf , "%%6s %%%ds %%%ds %%%ds %%%ds" , BUFSIZ / 5 , BUFSIZ / 5 , BUFSIZ / 5 , BUFSIZ / 5 );
686-
687819 if (!tsearch_readline_begin (& trst , filename ))
688820 ereport (ERROR ,
689821 (errcode (ERRCODE_CONFIG_FILE_ERROR ),
@@ -692,18 +824,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
692824
693825 while ((recoded = tsearch_readline (& trst )) != NULL )
694826 {
827+ int fields_read ;
828+
695829 if (* recoded == '\0' || t_isspace (recoded ) || t_iseq (recoded , '#' ))
696830 goto nextline ;
697831
698- scanread = sscanf (recoded , scanbuf , type , sflag , find , repl , mask );
832+ fields_read = parse_ooaffentry (recoded , type , sflag , find , repl , mask );
699833
700834 if (ptype )
701835 pfree (ptype );
702836 ptype = lowerstr_ctx (Conf , type );
703- if (scanread < 4 || (STRNCMP (ptype , "sfx" ) && STRNCMP (ptype , "pfx" )))
837+ if (fields_read < 4 ||
838+ (STRNCMP (ptype , "sfx" ) != 0 && STRNCMP (ptype , "pfx" ) != 0 ))
704839 goto nextline ;
705840
706- if (scanread == 4 )
841+ if (fields_read == 4 )
707842 {
708843 if (strlen (sflag ) != 1 )
709844 goto nextline ;
@@ -722,9 +857,13 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
722857 if (strlen (sflag ) != 1 || flag != * sflag || flag == 0 )
723858 goto nextline ;
724859 prepl = lowerstr_ctx (Conf , repl );
725- /* affix flag */
860+ /* Find position of '/' in lowercased string "prepl" */
726861 if ((ptr = strchr (prepl , '/' )) != NULL )
727862 {
863+ /*
864+ * Here we use non-lowercased string "repl". We need position
865+ * of '/' in "repl".
866+ */
728867 * ptr = '\0' ;
729868 ptr = repl + (ptr - prepl ) + 1 ;
730869 while (* ptr )
@@ -800,11 +939,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
800939
801940 if (STRNCMP (pstr , "compoundwords" ) == 0 )
802941 {
942+ /* Find position in lowercased string "pstr" */
803943 s = findchar (pstr , 'l' );
804944 if (s )
805945 {
806- s = recoded + ( s - pstr ); /* we need non-lowercased
807- * string */
946+ /* Here we use non-lowercased string "recoded" */
947+ s = recoded + ( s - pstr );
808948 while (* s && !t_isspace (s ))
809949 s += pg_mblen (s );
810950 while (* s && t_isspace (s ))
0 commit comments