@@ -96,6 +96,7 @@ typedef struct
9696 } key ;
9797 } data ;
9898 uint8 wclass ;
99+ float4 idf ;
99100 int32 pos ;
100101} DocRepresentation ;
101102
@@ -106,20 +107,13 @@ typedef struct
106107}
107108QueryRepresentationOperand ;
108109
109- typedef struct
110- {
111- float4 idf ;
112- bool idfloaded ;
113- } QueryRepresentationIDF ;
114-
115110typedef struct
116111{
117112 TSQuery query ;
118113 /* Used in rum_tsquery_distance() */
119114 int * map_item_operand ;
120115
121116 QueryRepresentationOperand * operandData ;
122- QueryRepresentationIDF * operandIdf ;
123117 int length ;
124118} QueryRepresentation ;
125119
@@ -1098,7 +1092,7 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
10981092}
10991093
11001094static DocRepresentation *
1101- get_docrep (TSVector txt , QueryRepresentation * qr , uint32 * doclen )
1095+ get_docrep (TSVector txt , QueryRepresentation * qr , uint32 * doclen , bool load_idf )
11021096{
11031097 QueryItem * item = GETQUERY (qr -> query );
11041098 WordEntry * entry ,
@@ -1134,6 +1128,8 @@ get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen)
11341128
11351129 while (entry - firstentry < nitem )
11361130 {
1131+ float4 idf ;
1132+
11371133 if (entry -> haspos )
11381134 {
11391135 dimt = POSDATALEN (txt , entry );
@@ -1187,12 +1183,18 @@ get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen)
11871183
11881184 }
11891185 }
1186+
1187+ if (load_idf )
1188+ idf = estimate_idf (STRPTR (txt ) + entry -> pos , entry -> len );
1189+ else
1190+ idf = 1.0f ;
11901191 }
11911192 else
11921193 {
11931194 doc [cur ].data .item .nitem = doc [cur - 1 ].data .item .nitem ;
11941195 doc [cur ].data .item .item = doc [cur - 1 ].data .item .item ;
11951196 }
1197+ doc [cur ].idf = idf ;
11961198 doc [cur ].pos = WEP_GETPOS (post [j ]);
11971199 doc [cur ].wclass = WEP_GETWEIGHT (post [j ]);
11981200 cur ++ ;
@@ -1256,6 +1258,7 @@ calc_score_docr(float4 *arrdata, DocRepresentation *doc, uint32 doclen,
12561258 /* For rum_tsquery_distance() */
12571259 else
12581260 new_cover_key += (int )(uintptr_t )ptr -> data .key .item_first ;
1261+ Idf += ptr -> idf ;
12591262 ptr ++ ;
12601263 }
12611264
@@ -1287,43 +1290,16 @@ calc_score_docr(float4 *arrdata, DocRepresentation *doc, uint32 doclen,
12871290
12881291 /* Compute the number of query terms in the cover */
12891292 for (i = 0 ; i < qr -> length ; i ++ )
1290- {
12911293 if (qr -> operandData [i ].operandexist )
1292- {
1293- if (method & RANK_NORM_IDF )
1294- {
1295- if (!qr -> operandIdf [i ].idfloaded )
1296- {
1297- QueryOperand * oper = (QueryOperand * ) (GETQUERY (qr -> query ) + i );
1298- qr -> operandIdf [i ].idf =
1299- estimate_idf (
1300- GETOPERAND (qr -> query ) + oper -> distance ,
1301- oper -> length
1302- );
1303- qr -> operandIdf [i ].idfloaded = true;
1304- }
1305-
1306- Idf += qr -> operandIdf [i ].idf ;
1307- }
1308- else
1309- {
1310- nitems ++ ;
1311- }
1312- }
1313- }
1294+ nitems ++ ;
13141295
13151296 Cpos = ((double ) (ext .end - ext .begin + 1 )) / InvSum ;
13161297
1298+ if (nitems > 0 )
1299+ Cpos *= nitems ;
1300+
13171301 if (method & RANK_NORM_IDF )
1318- {
1319- if (Idf >= 1.0 )
1320- Cpos *= Idf ;
1321- }
1322- else
1323- {
1324- if (nitems > 0 )
1325- Cpos *= nitems ;
1326- }
1302+ Cpos *= Idf ;
13271303
13281304 /*
13291305 * if doc are big enough then ext.q may be equal to ext.p due to limit
@@ -1408,11 +1384,9 @@ calc_score(float4 *arrdata, TSVector txt, TSQuery query, int method)
14081384 qr .query = query ;
14091385 qr .map_item_operand = NULL ;
14101386 qr .operandData = palloc0 (sizeof (qr .operandData [0 ]) * query -> size );
1411- if (method & RANK_NORM_IDF )
1412- qr .operandIdf = palloc0 (sizeof (qr .operandIdf [0 ]) * query -> size );
14131387 qr .length = query -> size ;
14141388
1415- doc = get_docrep (txt , & qr , & doclen );
1389+ doc = get_docrep (txt , & qr , & doclen , ( method & RANK_NORM_IDF ) ? true : false );
14161390 if (!doc )
14171391 {
14181392 pfree (qr .operandData );
0 commit comments