@@ -103,18 +103,22 @@ const uint8 pg_number_of_ones[256] = {
103103 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 , 5 , 6 , 6 , 7 , 6 , 7 , 7 , 8
104104};
105105
106- static int pg_popcount32_slow (uint32 word );
107- static int pg_popcount64_slow (uint64 word );
106+ static inline int pg_popcount32_slow (uint32 word );
107+ static inline int pg_popcount64_slow (uint64 word );
108+ static uint64 pg_popcount_slow (const char * buf , int bytes );
108109
109110#ifdef TRY_POPCNT_FAST
110111static bool pg_popcount_available (void );
111112static int pg_popcount32_choose (uint32 word );
112113static int pg_popcount64_choose (uint64 word );
113- static int pg_popcount32_fast (uint32 word );
114- static int pg_popcount64_fast (uint64 word );
114+ static uint64 pg_popcount_choose (const char * buf , int bytes );
115+ static inline int pg_popcount32_fast (uint32 word );
116+ static inline int pg_popcount64_fast (uint64 word );
117+ static uint64 pg_popcount_fast (const char * buf , int bytes );
115118
116119int (* pg_popcount32 ) (uint32 word ) = pg_popcount32_choose ;
117120int (* pg_popcount64 ) (uint64 word ) = pg_popcount64_choose ;
121+ uint64 (* pg_popcount ) (const char * buf , int bytes ) = pg_popcount_choose ;
118122#endif /* TRY_POPCNT_FAST */
119123
120124#ifdef TRY_POPCNT_FAST
@@ -151,11 +155,13 @@ pg_popcount32_choose(uint32 word)
151155 {
152156 pg_popcount32 = pg_popcount32_fast ;
153157 pg_popcount64 = pg_popcount64_fast ;
158+ pg_popcount = pg_popcount_fast ;
154159 }
155160 else
156161 {
157162 pg_popcount32 = pg_popcount32_slow ;
158163 pg_popcount64 = pg_popcount64_slow ;
164+ pg_popcount = pg_popcount_slow ;
159165 }
160166
161167 return pg_popcount32 (word );
@@ -168,21 +174,42 @@ pg_popcount64_choose(uint64 word)
168174 {
169175 pg_popcount32 = pg_popcount32_fast ;
170176 pg_popcount64 = pg_popcount64_fast ;
177+ pg_popcount = pg_popcount_fast ;
171178 }
172179 else
173180 {
174181 pg_popcount32 = pg_popcount32_slow ;
175182 pg_popcount64 = pg_popcount64_slow ;
183+ pg_popcount = pg_popcount_slow ;
176184 }
177185
178186 return pg_popcount64 (word );
179187}
180188
189+ static uint64
190+ pg_popcount_choose (const char * buf , int bytes )
191+ {
192+ if (pg_popcount_available ())
193+ {
194+ pg_popcount32 = pg_popcount32_fast ;
195+ pg_popcount64 = pg_popcount64_fast ;
196+ pg_popcount = pg_popcount_fast ;
197+ }
198+ else
199+ {
200+ pg_popcount32 = pg_popcount32_slow ;
201+ pg_popcount64 = pg_popcount64_slow ;
202+ pg_popcount = pg_popcount_slow ;
203+ }
204+
205+ return pg_popcount (buf , bytes );
206+ }
207+
181208/*
182209 * pg_popcount32_fast
183210 * Return the number of 1 bits set in word
184211 */
185- static int
212+ static inline int
186213pg_popcount32_fast (uint32 word )
187214{
188215#ifdef _MSC_VER
@@ -199,7 +226,7 @@ __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
199226 * pg_popcount64_fast
200227 * Return the number of 1 bits set in word
201228 */
202- static int
229+ static inline int
203230pg_popcount64_fast (uint64 word )
204231{
205232#ifdef _MSC_VER
@@ -212,14 +239,60 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
212239#endif
213240}
214241
242+ /*
243+ * pg_popcount_fast
244+ * Returns the number of 1-bits in buf
245+ */
246+ static uint64
247+ pg_popcount_fast (const char * buf , int bytes )
248+ {
249+ uint64 popcnt = 0 ;
250+
251+ #if SIZEOF_VOID_P >= 8
252+ /* Process in 64-bit chunks if the buffer is aligned. */
253+ if (buf == (const char * ) TYPEALIGN (8 , buf ))
254+ {
255+ const uint64 * words = (const uint64 * ) buf ;
256+
257+ while (bytes >= 8 )
258+ {
259+ popcnt += pg_popcount64_fast (* words ++ );
260+ bytes -= 8 ;
261+ }
262+
263+ buf = (const char * ) words ;
264+ }
265+ #else
266+ /* Process in 32-bit chunks if the buffer is aligned. */
267+ if (buf == (const char * ) TYPEALIGN (4 , buf ))
268+ {
269+ const uint32 * words = (const uint32 * ) buf ;
270+
271+ while (bytes >= 4 )
272+ {
273+ popcnt += pg_popcount32_fast (* words ++ );
274+ bytes -= 4 ;
275+ }
276+
277+ buf = (const char * ) words ;
278+ }
279+ #endif
280+
281+ /* Process any remaining bytes */
282+ while (bytes -- )
283+ popcnt += pg_number_of_ones [(unsigned char ) * buf ++ ];
284+
285+ return popcnt ;
286+ }
287+
215288#endif /* TRY_POPCNT_FAST */
216289
217290
218291/*
219292 * pg_popcount32_slow
220293 * Return the number of 1 bits set in word
221294 */
222- static int
295+ static inline int
223296pg_popcount32_slow (uint32 word )
224297{
225298#ifdef HAVE__BUILTIN_POPCOUNT
@@ -241,7 +314,7 @@ pg_popcount32_slow(uint32 word)
241314 * pg_popcount64_slow
242315 * Return the number of 1 bits set in word
243316 */
244- static int
317+ static inline int
245318pg_popcount64_slow (uint64 word )
246319{
247320#ifdef HAVE__BUILTIN_POPCOUNT
@@ -265,35 +338,12 @@ pg_popcount64_slow(uint64 word)
265338#endif /* HAVE__BUILTIN_POPCOUNT */
266339}
267340
268- #ifndef TRY_POPCNT_FAST
269-
270341/*
271- * When the POPCNT instruction is not available, there's no point in using
272- * function pointers to vary the implementation between the fast and slow
273- * method. We instead just make these actual external functions when
274- * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
275- * the slow versions here.
276- */
277- int
278- pg_popcount32 (uint32 word )
279- {
280- return pg_popcount32_slow (word );
281- }
282-
283- int
284- pg_popcount64 (uint64 word )
285- {
286- return pg_popcount64_slow (word );
287- }
288-
289- #endif /* !TRY_POPCNT_FAST */
290-
291- /*
292- * pg_popcount
342+ * pg_popcount_slow
293343 * Returns the number of 1-bits in buf
294344 */
295- uint64
296- pg_popcount (const char * buf , int bytes )
345+ static uint64
346+ pg_popcount_slow (const char * buf , int bytes )
297347{
298348 uint64 popcnt = 0 ;
299349
@@ -305,7 +355,7 @@ pg_popcount(const char *buf, int bytes)
305355
306356 while (bytes >= 8 )
307357 {
308- popcnt += pg_popcount64 (* words ++ );
358+ popcnt += pg_popcount64_slow (* words ++ );
309359 bytes -= 8 ;
310360 }
311361
@@ -319,7 +369,7 @@ pg_popcount(const char *buf, int bytes)
319369
320370 while (bytes >= 4 )
321371 {
322- popcnt += pg_popcount32 (* words ++ );
372+ popcnt += pg_popcount32_slow (* words ++ );
323373 bytes -= 4 ;
324374 }
325375
@@ -333,3 +383,36 @@ pg_popcount(const char *buf, int bytes)
333383
334384 return popcnt ;
335385}
386+
387+ #ifndef TRY_POPCNT_FAST
388+
389+ /*
390+ * When the POPCNT instruction is not available, there's no point in using
391+ * function pointers to vary the implementation between the fast and slow
392+ * method. We instead just make these actual external functions when
393+ * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
394+ * the slow versions here.
395+ */
396+ int
397+ pg_popcount32 (uint32 word )
398+ {
399+ return pg_popcount32_slow (word );
400+ }
401+
402+ int
403+ pg_popcount64 (uint64 word )
404+ {
405+ return pg_popcount64_slow (word );
406+ }
407+
408+ /*
409+ * pg_popcount
410+ * Returns the number of 1-bits in buf
411+ */
412+ uint64
413+ pg_popcount (const char * buf , int bytes )
414+ {
415+ return pg_popcount_slow (buf , bytes );
416+ }
417+
418+ #endif /* !TRY_POPCNT_FAST */
0 commit comments