|
35 | 35 | * receive a different case-normalization mapping. |
36 | 36 | */ |
37 | 37 | int |
38 | | -ScanKeywordLookup(const char *text, |
| 38 | +ScanKeywordLookup(const char *str, |
39 | 39 | const ScanKeywordList *keywords) |
40 | 40 | { |
41 | | - int len, |
42 | | - i; |
43 | | - char word[NAMEDATALEN]; |
44 | | - const char *kw_string; |
45 | | - const uint16 *kw_offsets; |
46 | | - const uint16 *low; |
47 | | - const uint16 *high; |
48 | | - |
49 | | - len = strlen(text); |
| 41 | + size_t len; |
| 42 | + int h; |
| 43 | + const char *kw; |
50 | 44 |
|
| 45 | + /* |
| 46 | + * Reject immediately if too long to be any keyword. This saves useless |
| 47 | + * hashing and downcasing work on long strings. |
| 48 | + */ |
| 49 | + len = strlen(str); |
51 | 50 | if (len > keywords->max_kw_len) |
52 | | - return -1; /* too long to be any keyword */ |
53 | | - |
54 | | - /* We assume all keywords are shorter than NAMEDATALEN. */ |
55 | | - Assert(len < NAMEDATALEN); |
| 51 | + return -1; |
56 | 52 |
|
57 | 53 | /* |
58 | | - * Apply an ASCII-only downcasing. We must not use tolower() since it may |
59 | | - * produce the wrong translation in some locales (eg, Turkish). |
| 54 | + * Compute the hash function. We assume it was generated to produce |
| 55 | + * case-insensitive results. Since it's a perfect hash, we need only |
| 56 | + * match to the specific keyword it identifies. |
60 | 57 | */ |
61 | | - for (i = 0; i < len; i++) |
62 | | - { |
63 | | - char ch = text[i]; |
| 58 | + h = keywords->hash(str, len); |
64 | 59 |
|
65 | | - if (ch >= 'A' && ch <= 'Z') |
66 | | - ch += 'a' - 'A'; |
67 | | - word[i] = ch; |
68 | | - } |
69 | | - word[len] = '\0'; |
| 60 | + /* An out-of-range result implies no match */ |
| 61 | + if (h < 0 || h >= keywords->num_keywords) |
| 62 | + return -1; |
70 | 63 |
|
71 | 64 | /* |
72 | | - * Now do a binary search using plain strcmp() comparison. |
| 65 | + * Compare character-by-character to see if we have a match, applying an |
| 66 | + * ASCII-only downcasing to the input characters. We must not use |
| 67 | + * tolower() since it may produce the wrong translation in some locales |
| 68 | + * (eg, Turkish). |
73 | 69 | */ |
74 | | - kw_string = keywords->kw_string; |
75 | | - kw_offsets = keywords->kw_offsets; |
76 | | - low = kw_offsets; |
77 | | - high = kw_offsets + (keywords->num_keywords - 1); |
78 | | - while (low <= high) |
| 70 | + kw = GetScanKeyword(h, keywords); |
| 71 | + while (*str != '\0') |
79 | 72 | { |
80 | | - const uint16 *middle; |
81 | | - int difference; |
| 73 | + char ch = *str++; |
82 | 74 |
|
83 | | - middle = low + (high - low) / 2; |
84 | | - difference = strcmp(kw_string + *middle, word); |
85 | | - if (difference == 0) |
86 | | - return middle - kw_offsets; |
87 | | - else if (difference < 0) |
88 | | - low = middle + 1; |
89 | | - else |
90 | | - high = middle - 1; |
| 75 | + if (ch >= 'A' && ch <= 'Z') |
| 76 | + ch += 'a' - 'A'; |
| 77 | + if (ch != *kw++) |
| 78 | + return -1; |
91 | 79 | } |
| 80 | + if (*kw != '\0') |
| 81 | + return -1; |
92 | 82 |
|
93 | | - return -1; |
| 83 | + /* Success! */ |
| 84 | + return h; |
94 | 85 | } |
0 commit comments