1
\$\begingroup\$

I was looking for comments (on performance and general implementation) on the following Linked HashMap implementation in C:

typedef int (*cmp)(void *a, void *b);
typedef size_t (*hash_fn)(void *val, int bound);

typedef struct {
    int index;
    size_t hash;
    void *key;
    void *val;
    struct Entry *next;
    struct Entry *neighbor;
} Entry;

typedef struct {
    Entry *head;
    Entry *tail;
    int size;
    int capacity;
    Entry **table;
    int threshold;
    float loadFactor;
    cmp key_cmp;
    hash_fn hash;
} HashMap;

void resize(HashMap **map);

void internal_init(HashMap **map, cmp key_cmp, hash_fn hash, int capacity) {
    (*map) = (HashMap *) calloc(1, sizeof(HashMap));
    (*map)->head = (*map)->tail = NULL;
    (*map)->size = 0;
    (*map)->capacity = capacity;
    (*map)->key_cmp = key_cmp;
    (*map)->hash = hash;

    (*map)->loadFactor = 0.75f;
    (*map)->threshold = (int) ((*map)->capacity * (*map)->loadFactor);

    (*map)->table = calloc(capacity, sizeof(Entry *));
    for (int i = 0; i < capacity; i++) {
        (*map)->table[i] = NULL;
    }
}

void init(HashMap **map, cmp key_cmp, hash_fn hash) {
    internal_init(map, key_cmp, hash, 16);
}

int indexOf(HashMap *map, void *key) {
    int ret = 0;
    for (Entry *e = map->head; e != NULL; e = (Entry *) e->next) {
        if (map->key_cmp(e->key, key)) {
            return ret;
        }
        ret++;
    }
    return -1;
}

int internal_put(HashMap **map, void *key, void *val) {
    // 1. Create entry
    Entry *entry = calloc(1, sizeof(Entry));
    entry->key = key;
    entry->val = val;
    entry->next = NULL;

    // Calculate hash and get entry
    size_t key_hash = (*map)->hash(key, (*map)->capacity);
    Entry *tableEntry = (*map)->table[key_hash];

    // If bucket has at least one element
    if (tableEntry != NULL) {
        while (tableEntry->neighbor != NULL) {
            if ((*map)->key_cmp(tableEntry->key, key)) {
                // Key already exists
                tableEntry->val = val;
                return tableEntry->index;
            }
            tableEntry = (Entry *) tableEntry->neighbor;
        }
        if ((*map)->key_cmp(tableEntry->key, key)) {
            // Key already exists
            tableEntry->val = val;
            return tableEntry->index;
        }
        // Key was not found in the bucket
        entry->neighbor = (struct Entry *) (*map)->table[key_hash];
    }
    entry->hash = key_hash;
    (*map)->table[key_hash] = entry;

    // Append to list
    if ((*map)->head == NULL && (*map)->tail == NULL) {
        (*map)->head = (*map)->tail = entry;
        entry->index = 1;
    } else {
        entry->index = (*map)->tail->index + 1;
        (*map)->tail->next = (struct Entry *) entry;
        (*map)->tail = (Entry *) (*map)->tail->next;
    }

    (*map)->size++;
    return entry->index;
}

int put(HashMap **map, void *key, void *val) {
    int ret = internal_put(map, key, val);
    if ((*map)->size >= (*map)->threshold) {
        resize(map);
    }
    return ret;
}

void *get(HashMap *map, void *key) {
    size_t key_hash = map->hash(key, map->capacity);
    if (map->table[key_hash] != NULL) {
        Entry *el = map->table[key_hash];
        while (el != NULL) {
            if (map->key_cmp(el->key, key)) {
                return el->val;
            }
            el = (Entry *) el->neighbor;
        }
    }
    return NULL;
}

void resize(HashMap **map) {
    HashMap *newMap;
    internal_init(&newMap, (*map)->key_cmp, (*map)->hash, (*map)->capacity * 2);

    Entry *e = (*map)->head;
    while (e != NULL) {
        internal_put(&newMap, e->key, e->val);
        e = (Entry *) e->next;
    }
    (*map) = newMap;
}

int containsKey(HashMap *map, void *key) {
    return get(map, key) != NULL;
}

I maintain a linked list in each bucket as well as a central linked list (starting at “head”). On insertion, I add into the correct bucket as well as the tail in order to maintain the insertion order. Please let me know if you need any more information.

\$\endgroup\$
1
  • \$\begingroup\$ This would be a better question if all included header files were shown and if the code that creates the the hashmap and uses the hasmap were shown. It is not currently clear that the code works. \$\endgroup\$ Commented Jul 22, 2019 at 15:23

1 Answer 1

2
\$\begingroup\$

We need to include <stdlib.h>, to have a prototype for calloc().


There's no definition of struct Entry here; this isn't valid C:

typedef struct {
    struct Entry *next;
    struct Entry *neighbor;
} Entry;

Let's have a look at how we create a HashMap object:

   (*map) = (HashMap *) calloc(1, sizeof(HashMap));

It's not necessary to cast the result of the malloc() family of functions, and can be considered harmful to do so.

Something that is necessary, but is absent, is a test that the return value is non-null before it's used.

It's better to return the map pointer than to assign it via pointer to pointer.

It also helps to use the size of *map rather than of (HashMap), as it's then easier to visually match that the code is correct.

There's no benefit to using calloc() rather than malloc(), as we're initializing all members programmatically. Using calloc() just wastes the processor's time.

Those changes lead to

     map = malloc(sizeof *map);
     if (!map) { return map; }

The function to free a hash map seems to be completely missing.

It seems strange to use int rather than size_t for size and capacity members.

There's more unnecessary casts in many places:

for (Entry *e = map->head; e != NULL; e = (Entry *) e->next) {
    entry->neighbor = (struct Entry *) (*map)->table[key_hash];
    (*map)->tail = (Entry *) (*map)->tail->next;
\$\endgroup\$
0

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.