3232#include "snowball/libstemmer/stem_ISO_8859_1_finnish.h"
3333#include "snowball/libstemmer/stem_ISO_8859_1_french.h"
3434#include "snowball/libstemmer/stem_ISO_8859_1_german.h"
35- #include "snowball/libstemmer/stem_ISO_8859_1_hungarian.h"
35+ #include "snowball/libstemmer/stem_ISO_8859_1_indonesian.h"
36+ #include "snowball/libstemmer/stem_ISO_8859_1_irish.h"
3637#include "snowball/libstemmer/stem_ISO_8859_1_italian.h"
3738#include "snowball/libstemmer/stem_ISO_8859_1_norwegian.h"
3839#include "snowball/libstemmer/stem_ISO_8859_1_porter.h"
3940#include "snowball/libstemmer/stem_ISO_8859_1_portuguese.h"
4041#include "snowball/libstemmer/stem_ISO_8859_1_spanish.h"
4142#include "snowball/libstemmer/stem_ISO_8859_1_swedish.h"
43+ #include "snowball/libstemmer/stem_ISO_8859_2_hungarian.h"
4244#include "snowball/libstemmer/stem_ISO_8859_2_romanian.h"
4345#include "snowball/libstemmer/stem_KOI8_R_russian.h"
46+ #include "snowball/libstemmer/stem_UTF_8_arabic.h"
4447#include "snowball/libstemmer/stem_UTF_8_danish.h"
4548#include "snowball/libstemmer/stem_UTF_8_dutch.h"
4649#include "snowball/libstemmer/stem_UTF_8_english.h"
4750#include "snowball/libstemmer/stem_UTF_8_finnish.h"
4851#include "snowball/libstemmer/stem_UTF_8_french.h"
4952#include "snowball/libstemmer/stem_UTF_8_german.h"
5053#include "snowball/libstemmer/stem_UTF_8_hungarian.h"
54+ #include "snowball/libstemmer/stem_UTF_8_indonesian.h"
55+ #include "snowball/libstemmer/stem_UTF_8_irish.h"
5156#include "snowball/libstemmer/stem_UTF_8_italian.h"
57+ #include "snowball/libstemmer/stem_UTF_8_lithuanian.h"
58+ #include "snowball/libstemmer/stem_UTF_8_nepali.h"
5259#include "snowball/libstemmer/stem_UTF_8_norwegian.h"
5360#include "snowball/libstemmer/stem_UTF_8_porter.h"
5461#include "snowball/libstemmer/stem_UTF_8_portuguese.h"
5562#include "snowball/libstemmer/stem_UTF_8_romanian.h"
5663#include "snowball/libstemmer/stem_UTF_8_russian.h"
5764#include "snowball/libstemmer/stem_UTF_8_spanish.h"
5865#include "snowball/libstemmer/stem_UTF_8_swedish.h"
66+ #include "snowball/libstemmer/stem_UTF_8_tamil.h"
5967#include "snowball/libstemmer/stem_UTF_8_turkish.h"
6068
6169PG_MODULE_MAGIC ;
@@ -74,48 +82,60 @@ typedef struct stemmer_module
7482 int (* stem ) (struct SN_env * );
7583} stemmer_module ;
7684
85+ /* Args: stemmer name, PG code for encoding, Snowball's name for encoding */
86+ #define STEMMER_MODULE (name ,enc ,senc ) \
87+ {#name, enc, name##_##senc##_create_env, name##_##senc##_close_env, name##_##senc##_stem}
88+
7789static const stemmer_module stemmer_modules [] =
7890{
7991 /*
8092 * Stemmers list from Snowball distribution
8193 */
82- {"danish" , PG_LATIN1 , danish_ISO_8859_1_create_env , danish_ISO_8859_1_close_env , danish_ISO_8859_1_stem },
83- {"dutch" , PG_LATIN1 , dutch_ISO_8859_1_create_env , dutch_ISO_8859_1_close_env , dutch_ISO_8859_1_stem },
84- {"english" , PG_LATIN1 , english_ISO_8859_1_create_env , english_ISO_8859_1_close_env , english_ISO_8859_1_stem },
85- {"finnish" , PG_LATIN1 , finnish_ISO_8859_1_create_env , finnish_ISO_8859_1_close_env , finnish_ISO_8859_1_stem },
86- {"french" , PG_LATIN1 , french_ISO_8859_1_create_env , french_ISO_8859_1_close_env , french_ISO_8859_1_stem },
87- {"german" , PG_LATIN1 , german_ISO_8859_1_create_env , german_ISO_8859_1_close_env , german_ISO_8859_1_stem },
88- {"hungarian" , PG_LATIN1 , hungarian_ISO_8859_1_create_env , hungarian_ISO_8859_1_close_env , hungarian_ISO_8859_1_stem },
89- {"italian" , PG_LATIN1 , italian_ISO_8859_1_create_env , italian_ISO_8859_1_close_env , italian_ISO_8859_1_stem },
90- {"norwegian" , PG_LATIN1 , norwegian_ISO_8859_1_create_env , norwegian_ISO_8859_1_close_env , norwegian_ISO_8859_1_stem },
91- {"porter" , PG_LATIN1 , porter_ISO_8859_1_create_env , porter_ISO_8859_1_close_env , porter_ISO_8859_1_stem },
92- {"portuguese" , PG_LATIN1 , portuguese_ISO_8859_1_create_env , portuguese_ISO_8859_1_close_env , portuguese_ISO_8859_1_stem },
93- {"spanish" , PG_LATIN1 , spanish_ISO_8859_1_create_env , spanish_ISO_8859_1_close_env , spanish_ISO_8859_1_stem },
94- {"swedish" , PG_LATIN1 , swedish_ISO_8859_1_create_env , swedish_ISO_8859_1_close_env , swedish_ISO_8859_1_stem },
95- {"romanian" , PG_LATIN2 , romanian_ISO_8859_2_create_env , romanian_ISO_8859_2_close_env , romanian_ISO_8859_2_stem },
96- {"russian" , PG_KOI8R , russian_KOI8_R_create_env , russian_KOI8_R_close_env , russian_KOI8_R_stem },
97- {"danish" , PG_UTF8 , danish_UTF_8_create_env , danish_UTF_8_close_env , danish_UTF_8_stem },
98- {"dutch" , PG_UTF8 , dutch_UTF_8_create_env , dutch_UTF_8_close_env , dutch_UTF_8_stem },
99- {"english" , PG_UTF8 , english_UTF_8_create_env , english_UTF_8_close_env , english_UTF_8_stem },
100- {"finnish" , PG_UTF8 , finnish_UTF_8_create_env , finnish_UTF_8_close_env , finnish_UTF_8_stem },
101- {"french" , PG_UTF8 , french_UTF_8_create_env , french_UTF_8_close_env , french_UTF_8_stem },
102- {"german" , PG_UTF8 , german_UTF_8_create_env , german_UTF_8_close_env , german_UTF_8_stem },
103- {"hungarian" , PG_UTF8 , hungarian_UTF_8_create_env , hungarian_UTF_8_close_env , hungarian_UTF_8_stem },
104- {"italian" , PG_UTF8 , italian_UTF_8_create_env , italian_UTF_8_close_env , italian_UTF_8_stem },
105- {"norwegian" , PG_UTF8 , norwegian_UTF_8_create_env , norwegian_UTF_8_close_env , norwegian_UTF_8_stem },
106- {"porter" , PG_UTF8 , porter_UTF_8_create_env , porter_UTF_8_close_env , porter_UTF_8_stem },
107- {"portuguese" , PG_UTF8 , portuguese_UTF_8_create_env , portuguese_UTF_8_close_env , portuguese_UTF_8_stem },
108- {"romanian" , PG_UTF8 , romanian_UTF_8_create_env , romanian_UTF_8_close_env , romanian_UTF_8_stem },
109- {"russian" , PG_UTF8 , russian_UTF_8_create_env , russian_UTF_8_close_env , russian_UTF_8_stem },
110- {"spanish" , PG_UTF8 , spanish_UTF_8_create_env , spanish_UTF_8_close_env , spanish_UTF_8_stem },
111- {"swedish" , PG_UTF8 , swedish_UTF_8_create_env , swedish_UTF_8_close_env , swedish_UTF_8_stem },
112- {"turkish" , PG_UTF8 , turkish_UTF_8_create_env , turkish_UTF_8_close_env , turkish_UTF_8_stem },
94+ STEMMER_MODULE (danish , PG_LATIN1 , ISO_8859_1 ),
95+ STEMMER_MODULE (dutch , PG_LATIN1 , ISO_8859_1 ),
96+ STEMMER_MODULE (english , PG_LATIN1 , ISO_8859_1 ),
97+ STEMMER_MODULE (finnish , PG_LATIN1 , ISO_8859_1 ),
98+ STEMMER_MODULE (french , PG_LATIN1 , ISO_8859_1 ),
99+ STEMMER_MODULE (german , PG_LATIN1 , ISO_8859_1 ),
100+ STEMMER_MODULE (indonesian , PG_LATIN1 , ISO_8859_1 ),
101+ STEMMER_MODULE (irish , PG_LATIN1 , ISO_8859_1 ),
102+ STEMMER_MODULE (italian , PG_LATIN1 , ISO_8859_1 ),
103+ STEMMER_MODULE (norwegian , PG_LATIN1 , ISO_8859_1 ),
104+ STEMMER_MODULE (porter , PG_LATIN1 , ISO_8859_1 ),
105+ STEMMER_MODULE (portuguese , PG_LATIN1 , ISO_8859_1 ),
106+ STEMMER_MODULE (spanish , PG_LATIN1 , ISO_8859_1 ),
107+ STEMMER_MODULE (swedish , PG_LATIN1 , ISO_8859_1 ),
108+ STEMMER_MODULE (hungarian , PG_LATIN2 , ISO_8859_2 ),
109+ STEMMER_MODULE (romanian , PG_LATIN2 , ISO_8859_2 ),
110+ STEMMER_MODULE (russian , PG_KOI8R , KOI8_R ),
111+ STEMMER_MODULE (arabic , PG_UTF8 , UTF_8 ),
112+ STEMMER_MODULE (danish , PG_UTF8 , UTF_8 ),
113+ STEMMER_MODULE (dutch , PG_UTF8 , UTF_8 ),
114+ STEMMER_MODULE (english , PG_UTF8 , UTF_8 ),
115+ STEMMER_MODULE (finnish , PG_UTF8 , UTF_8 ),
116+ STEMMER_MODULE (french , PG_UTF8 , UTF_8 ),
117+ STEMMER_MODULE (german , PG_UTF8 , UTF_8 ),
118+ STEMMER_MODULE (hungarian , PG_UTF8 , UTF_8 ),
119+ STEMMER_MODULE (indonesian , PG_UTF8 , UTF_8 ),
120+ STEMMER_MODULE (irish , PG_UTF8 , UTF_8 ),
121+ STEMMER_MODULE (italian , PG_UTF8 , UTF_8 ),
122+ STEMMER_MODULE (lithuanian , PG_UTF8 , UTF_8 ),
123+ STEMMER_MODULE (nepali , PG_UTF8 , UTF_8 ),
124+ STEMMER_MODULE (norwegian , PG_UTF8 , UTF_8 ),
125+ STEMMER_MODULE (porter , PG_UTF8 , UTF_8 ),
126+ STEMMER_MODULE (portuguese , PG_UTF8 , UTF_8 ),
127+ STEMMER_MODULE (romanian , PG_UTF8 , UTF_8 ),
128+ STEMMER_MODULE (russian , PG_UTF8 , UTF_8 ),
129+ STEMMER_MODULE (spanish , PG_UTF8 , UTF_8 ),
130+ STEMMER_MODULE (swedish , PG_UTF8 , UTF_8 ),
131+ STEMMER_MODULE (tamil , PG_UTF8 , UTF_8 ),
132+ STEMMER_MODULE (turkish , PG_UTF8 , UTF_8 ),
113133
114134 /*
115135 * Stemmer with PG_SQL_ASCII encoding should be valid for any server
116136 * encoding
117137 */
118- { " english" , PG_SQL_ASCII , english_ISO_8859_1_create_env , english_ISO_8859_1_close_env , english_ISO_8859_1_stem } ,
138+ STEMMER_MODULE ( english , PG_SQL_ASCII , ISO_8859_1 ) ,
119139
120140 {NULL , 0 , NULL , NULL , NULL } /* list end marker */
121141};
0 commit comments