@@ -259,6 +259,48 @@ typedef struct BloomFilter
259259 char data [FLEXIBLE_ARRAY_MEMBER ];
260260} BloomFilter ;
261261
262+ /*
263+ * bloom_filter_size
264+ * Calculate Bloom filter parameters (nbits, nbytes, nhashes).
265+ *
266+ * Given expected number of distinct values and desired false positive rate,
267+ * calculates the optimal parameters of the Bloom filter.
268+ *
269+ * The resulting parameters are returned through nbytesp (number of bytes),
270+ * nbitsp (number of bits) and nhashesp (number of hash functions). If a
271+ * pointer is NULL, the parameter is not returned.
272+ */
273+ static void
274+ bloom_filter_size (int ndistinct , double false_positive_rate ,
275+ int * nbytesp , int * nbitsp , int * nhashesp )
276+ {
277+ double k ;
278+ int nbits ,
279+ nbytes ;
280+
281+ /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
282+ nbits = ceil (- (ndistinct * log (false_positive_rate )) / pow (log (2.0 ), 2 ));
283+
284+ /* round m to whole bytes */
285+ nbytes = ((nbits + 7 ) / 8 );
286+ nbits = nbytes * 8 ;
287+
288+ /*
289+ * round(log(2.0) * m / ndistinct), but assume round() may not be
290+ * available on Windows
291+ */
292+ k = log (2.0 ) * nbits / ndistinct ;
293+ k = (k - floor (k ) >= 0.5 ) ? ceil (k ) : floor (k );
294+
295+ if (nbytesp )
296+ * nbytesp = nbytes ;
297+
298+ if (nbitsp )
299+ * nbitsp = nbits ;
300+
301+ if (nhashesp )
302+ * nhashesp = (int ) k ;
303+ }
262304
263305/*
264306 * bloom_init
@@ -275,19 +317,15 @@ bloom_init(int ndistinct, double false_positive_rate)
275317
276318 int nbits ; /* size of filter / number of bits */
277319 int nbytes ; /* size of filter / number of bytes */
278-
279- double k ; /* number of hash functions */
320+ int nhashes ; /* number of hash functions */
280321
281322 Assert (ndistinct > 0 );
282323 Assert ((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE ) &&
283324 (false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE ));
284325
285- /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
286- nbits = ceil (- (ndistinct * log (false_positive_rate )) / pow (log (2.0 ), 2 ));
287-
288- /* round m to whole bytes */
289- nbytes = ((nbits + 7 ) / 8 );
290- nbits = nbytes * 8 ;
326+ /* calculate bloom filter size / parameters */
327+ bloom_filter_size (ndistinct , false_positive_rate ,
328+ & nbytes , & nbits , & nhashes );
291329
292330 /*
293331 * Reject filters that are obviously too large to store on a page.
@@ -310,13 +348,6 @@ bloom_init(int ndistinct, double false_positive_rate)
310348 elog (ERROR , "the bloom filter is too large (%d > %zu)" , nbytes ,
311349 BloomMaxFilterSize );
312350
313- /*
314- * round(log(2.0) * m / ndistinct), but assume round() may not be
315- * available on Windows
316- */
317- k = log (2.0 ) * nbits / ndistinct ;
318- k = (k - floor (k ) >= 0.5 ) ? ceil (k ) : floor (k );
319-
320351 /*
321352 * We allocate the whole filter. Most of it is going to be 0 bits, so the
322353 * varlena is easy to compress.
@@ -326,7 +357,7 @@ bloom_init(int ndistinct, double false_positive_rate)
326357 filter = (BloomFilter * ) palloc0 (len );
327358
328359 filter -> flags = 0 ;
329- filter -> nhashes = ( int ) k ;
360+ filter -> nhashes = nhashes ;
330361 filter -> nbits = nbits ;
331362
332363 SET_VARSIZE (filter , len );
0 commit comments