112112 * of identical bytes like trailing spaces) and for bigger ones
113113 * our 4K maximum look-back distance is too small.
114114 *
115- * The compressor creates a table for 8192 lists of positions.
115+ * The compressor creates a table for lists of positions.
116116 * For each input position (except the last 3), a hash key is
117117 * built from the 4 next input bytes and the position remembered
118118 * in the appropriate list. Thus, the table points to linked
119119 * lists of likely to be at least in the first 4 characters
120120 * matching strings. This is done on the fly while the input
121121 * is compressed into the output area. Table entries are only
122122 * kept for the last 4096 input positions, since we cannot use
123- * back-pointers larger than that anyway.
123+ * back-pointers larger than that anyway. The size of the hash
124+ * table is chosen based on the size of the input - a larger table
125+ * has a larger startup cost, as it needs to be initialized to
126+ * zero, but reduces the number of hash collisions on long inputs.
124127 *
125- * For each byte in the input, it's hash key (built from this
128+ * For each byte in the input, its hash key (built from this
126129 * byte and the next 3) is used to find the appropriate list
127130 * in the table. The lists remember the positions of all bytes
128131 * that had the same hash key in the past in increasing backward
180183 * Local definitions
181184 * ----------
182185 */
183- #define PGLZ_HISTORY_LISTS 8192 /* must be power of 2 */
184- #define PGLZ_HISTORY_MASK (PGLZ_HISTORY_LISTS - 1)
186+ #define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */
185187#define PGLZ_HISTORY_SIZE 4096
186188#define PGLZ_MAX_MATCH 273
187189
@@ -241,9 +243,15 @@ const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
241243 * Statically allocated work arrays for history
242244 * ----------
243245 */
244- static PGLZ_HistEntry * hist_start [PGLZ_HISTORY_LISTS ];
245- static PGLZ_HistEntry hist_entries [PGLZ_HISTORY_SIZE ];
246+ static int16 hist_start [PGLZ_MAX_HISTORY_LISTS ];
247+ static PGLZ_HistEntry hist_entries [PGLZ_HISTORY_SIZE + 1 ];
246248
249+ /*
250+ * Element 0 in hist_entries is unused, and means 'invalid'. Likewise,
251+ * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'.
252+ */
253+ #define INVALID_ENTRY 0
254+ #define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY])
247255
248256/* ----------
249257 * pglz_hist_idx -
@@ -257,10 +265,10 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
257265 * hash keys more.
258266 * ----------
259267 */
260- #define pglz_hist_idx (_s ,_e ) ( \
268+ #define pglz_hist_idx (_s ,_e , _mask ) ( \
261269 ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \
262- (((_s)[0] << 9 ) ^ ((_s)[1] << 6 ) ^ \
263- ((_s)[2] << 3 ) ^ (_s)[3])) & (PGLZ_HISTORY_MASK ) \
270+ (((_s)[0] << 6 ) ^ ((_s)[1] << 4 ) ^ \
271+ ((_s)[2] << 2 ) ^ (_s)[3])) & (_mask ) \
264272 )
265273
266274
@@ -276,28 +284,35 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
276284 * _hn and _recycle are modified in the macro.
277285 * ----------
278286 */
279- #define pglz_hist_add (_hs ,_he ,_hn ,_recycle ,_s ,_e ) \
287+ #define pglz_hist_add (_hs ,_he ,_hn ,_recycle ,_s ,_e , _mask ) \
280288do { \
281- int __hindex = pglz_hist_idx((_s),(_e)); \
282- PGLZ_HistEntry ** __myhsp = &(_hs)[__hindex]; \
289+ int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \
290+ int16 * __myhsp = &(_hs)[__hindex]; \
283291 PGLZ_HistEntry *__myhe = &(_he)[_hn]; \
284292 if (_recycle) { \
285293 if (__myhe->prev == NULL) \
286- (_hs)[__myhe->hindex] = __myhe->next; \
294+ (_hs)[__myhe->hindex] = __myhe->next - (_he); \
287295 else \
288296 __myhe->prev->next = __myhe->next; \
289297 if (__myhe->next != NULL) \
290298 __myhe->next->prev = __myhe->prev; \
291299 } \
292- __myhe->next = *__myhsp; \
300+ __myhe->next = &(_he)[ *__myhsp]; \
293301 __myhe->prev = NULL; \
294302 __myhe->hindex = __hindex; \
295303 __myhe->pos = (_s); \
296- if (*__myhsp != NULL) \
297- (*__myhsp)->prev = __myhe; \
298- *__myhsp = __myhe; \
299- if (++(_hn) >= PGLZ_HISTORY_SIZE) { \
300- (_hn) = 0; \
304+ /* If there was an existing entry in this hash slot, link */ \
305+ /* this new entry to it. However, the 0th entry in the */ \
306+ /* entries table is unused, so we can freely scribble on it. */ \
307+ /* So don't bother checking if the slot was used - we'll */ \
308+ /* scribble on the unused entry if it was not, but that's */ \
309+ /* harmless. Avoiding the branch in this critical path */ \
310+ /* speeds this up a little bit. */ \
311+ /* if (*__myhsp != INVALID_ENTRY) */ \
312+ (_he )[(* __myhsp )].prev = __myhe ; \
313+ * __myhsp = _hn ; \
314+ if (++ (_hn ) >= PGLZ_HISTORY_SIZE + 1 ) { \
315+ (_hn ) = 1 ; \
301316 (_recycle ) = true; \
302317 } \
303318} while (0 )
@@ -372,18 +387,20 @@ do { \
372387 * ----------
373388 */
374389static inline int
375- pglz_find_match (PGLZ_HistEntry * * hstart , const char * input , const char * end ,
376- int * lenp , int * offp , int good_match , int good_drop )
390+ pglz_find_match (int16 * hstart , const char * input , const char * end ,
391+ int * lenp , int * offp , int good_match , int good_drop , int mask )
377392{
378393 PGLZ_HistEntry * hent ;
394+ int16 hentno ;
379395 int32 len = 0 ;
380396 int32 off = 0 ;
381397
382398 /*
383399 * Traverse the linked history list until a good enough match is found.
384400 */
385- hent = hstart [pglz_hist_idx (input , end )];
386- while (hent )
401+ hentno = hstart [pglz_hist_idx (input , end , mask )];
402+ hent = & hist_entries [hentno ];
403+ while (hent != INVALID_ENTRY_PTR )
387404 {
388405 const char * ip = input ;
389406 const char * hp = hent -> pos ;
@@ -484,7 +501,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
484501{
485502 unsigned char * bp = ((unsigned char * ) dest ) + sizeof (PGLZ_Header );
486503 unsigned char * bstart = bp ;
487- int hist_next = 0 ;
504+ int hist_next = 1 ;
488505 bool hist_recycle = false;
489506 const char * dp = source ;
490507 const char * dend = source + slen ;
@@ -500,6 +517,8 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
500517 int32 result_size ;
501518 int32 result_max ;
502519 int32 need_rate ;
520+ int hashsz ;
521+ int mask ;
503522
504523 /*
505524 * Our fallback strategy is the default.
@@ -555,11 +574,29 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
555574 else
556575 result_max = (slen * (100 - need_rate )) / 100 ;
557576
577+ /*
578+ * Experiments suggest that these hash sizes work pretty well. A large
579+ * hash table minimizes collision, but has a higher startup cost. For
580+ * a small input, the startup cost dominates. The table size must be
581+ * a power of two.
582+ */
583+ if (slen < 128 )
584+ hashsz = 512 ;
585+ else if (slen < 256 )
586+ hashsz = 1024 ;
587+ else if (slen < 512 )
588+ hashsz = 2048 ;
589+ else if (slen < 1024 )
590+ hashsz = 4096 ;
591+ else
592+ hashsz = 8192 ;
593+ mask = hashsz - 1 ;
594+
558595 /*
559596 * Initialize the history lists to empty. We do not need to zero the
560597 * hist_entries[] array; its entries are initialized as they are used.
561598 */
562- memset (hist_start , 0 , sizeof (hist_start ));
599+ memset (hist_start , 0 , hashsz * sizeof (int16 ));
563600
564601 /*
565602 * Compress the source directly into the output buffer.
@@ -589,7 +626,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
589626 * Try to find a match in the history
590627 */
591628 if (pglz_find_match (hist_start , dp , dend , & match_len ,
592- & match_off , good_match , good_drop ))
629+ & match_off , good_match , good_drop , mask ))
593630 {
594631 /*
595632 * Create the tag and add history entries for all matched
@@ -600,7 +637,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
600637 {
601638 pglz_hist_add (hist_start , hist_entries ,
602639 hist_next , hist_recycle ,
603- dp , dend );
640+ dp , dend , mask );
604641 dp ++ ; /* Do not do this ++ in the line above! */
605642 /* The macro would do it four times - Jan. */
606643 }
@@ -614,7 +651,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
614651 pglz_out_literal (ctrlp , ctrlb , ctrl , bp , * dp );
615652 pglz_hist_add (hist_start , hist_entries ,
616653 hist_next , hist_recycle ,
617- dp , dend );
654+ dp , dend , mask );
618655 dp ++ ; /* Do not do this ++ in the line above! */
619656 /* The macro would do it four times - Jan. */
620657 }
0 commit comments