I am a C++ programmer trying to learn C. Please critique my C code here. I am trying to make a small "hash table" that's \$O(n)\$ lookup. But, it is stack-based and so should be no slower than a properly done hash table if the CAPACITY is set to something small if it can fit inside CPU cache, right? By the way, I wish there's an easy way to verify that.
.h file:
/* This is a stack based data structure that has hash table interface and requires no heap allocation.
* Constraints: Caller must not fill the more than CAPACITY number of kv pairs, otherwise
* the program crashes via assert(). The key can be anything but INT_MIN or INT_MIN+1.
*/
#include <limits.h>
#define CAPACITY 10001
#define EMPTY INT_MIN
typedef struct {
int key;
int value;
} entry;
typedef struct {
entry data[CAPACITY];
} on_hash_table;
void on_hash_table_init(on_hash_table* ht);
// Return NOT_FOUND if not found.
int on_hash_table_get(on_hash_table* ht, int key);
// Set value. Grow the table if needed.
void on_hash_table_set(on_hash_table* ht, int key, int value);
// Removes the entry.
void on_hash_table_remove(on_hash_table*ht, int key);
.c file:
#include "on_hash_table.h"
#include <stdlib.h>
#include <assert.h>
#define TOMBSTONE (INT_MIN + 1)
void on_hash_table_init(on_hash_table* ht) {
//ht->data = (entry*) malloc(sizeof(entry) * 128);
for (int i = 0; i < CAPACITY; ++i) {
ht->data[i].key = EMPTY;
}
}
// Return EMPTY if not found.
int on_hash_table_get(on_hash_table* ht, int key) {
for (int i = 0;; ++i) {
int k = ht->data[i].key;
if (k == key) {
return ht->data[i].value;
} else if (k == EMPTY) {
return EMPTY;
}
}
// Never reach here!
assert(0);
}
// Set value.
void on_hash_table_set(on_hash_table* ht, int key, int value) {
for (int i = 0;; ++i) {
assert(i < CAPACITY);
int k = ht->data[i].key;
if (k == key || k==TOMBSTONE || k==EMPTY) {
ht->data[i].key = key;
ht->data[i].value = value;
return;
}
}
assert(0);
}
// Removes the entry.
void on_hash_table_remove(on_hash_table* ht, int key) {
for (int i = 0;; ++i) {
assert(i < CAPACITY);
int k = ht->data[i].key;
if (k == EMPTY) {
return;
}
if (k == key) {
ht->data[i].key = TOMBSTONE;
return;
}
}
assert(0);
}
test file: On my PC, my tests results are:
"Time taken: 191.189 microseconds" for TEST_SZ 1
"Time taken: 207.105 microseconds" for TEST_SZ 10
"Time taken: 441.190 microseconds" for TEST_SZ 100
"Time taken: 7250.318 microseconds" for TEST_SZ 1000
"Time taken: 458796.948 microseconds" for TEST_SZ 10000
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <stdbool.h>
#include <assert.h>
#include "on_hash_table.h"
#define TEST_SZ 1000
typedef struct {
int key;
int value;
} test_case ;
void test(test_case* tc) {
on_hash_table ht;
on_hash_table_init(&ht);
for (int i = 0; i<TEST_SZ; ++i) {
on_hash_table_set(&ht, tc[i].key, tc[i].value);
}
// Verify lookup works.
for (int i = 0; i<TEST_SZ; ++i) {
bool passed = tc[i].value == on_hash_table_get(&ht, tc[i].key);
assert(passed);
}
// Remove everything
for (int i = 0; i<TEST_SZ; ++i) {
on_hash_table_remove(&ht, tc[i].key);
}
// Nothing is found after deletion.
for (int i = 0; i<TEST_SZ; ++i) {
bool passed = EMPTY == on_hash_table_get(&ht, tc[i].key);
assert(passed);
}
}
int main() {
test_case test_cases[TEST_SZ] = {0};
for (int i = 0; i<TEST_SZ; ++i) {
test_case tc;
int r = rand();
tc.key = r;
tc.value = rand();
test_cases[i] = tc;
}
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start); // Start timing
test(test_cases);
clock_gettime(CLOCK_MONOTONIC, &end); // End timing
// Compute time difference in microseconds
long seconds = end.tv_sec - start.tv_sec;
long nanoseconds = end.tv_nsec - start.tv_nsec;
double elapsed = seconds * 1e6 + nanoseconds / 1e3;
printf("Time taken: %.3f microseconds\n", elapsed);
return 0;
}