Associative arrays/Creation/C
There are no associative arrays in the C language. Some libraries provide hash tables, red-black trees, or other data structures that can become associative arrays. Some of these libraries are awful. There are too many easy ways to get a compiler error or a segfault.
Libraries
Judy
Example using Judy.
<lang c>#include <stdio.h>
- include <Judy.h>
int main() {
Pvoid_t assarray = (Pvoid_t) NULL; PWord_t value; int retval;
/* populating */ JSLI(value, assarray, "red"); *value = 0xff0000; JSLI(value, assarray, "green"); *value = 0x00ff00; JSLI(value, assarray, "blue"); *value = 0x0000ff;
/* retrieving existent */ JSLG(value, assarray, "blue"); printf("blue is #%06lx\n", (unsigned long)*value);
/* unknown key */ JSLG(value, assarray, "nonexistingkey"); if ( value == NULL ) { fprintf(stderr, "key 'nonexistingkey' does not exists\n"); }
/* deleting */ JSLD(retval, assarray, "red"); JSLG(value, assarray, "red"); if ( value == NULL ) { fprintf(stderr, "key red does not exist anymore\n"); }
JudySLFreeArray(&assarray, PJE0);
return 0;
}</lang>
We can easily iterate over pair of keys (indexes) and values.
<lang c>#include <stdio.h>
- include <Judy.h>
- define MAXLINELEN 256
int main() {
Pvoid_t assoc_arr = (Pvoid_t) NULL; PWord_t val;
uint8_t idx[MAXLINELEN];
// insert some values JSLI(val, assoc_arr, "hello"); *val = 4; JSLI(val, assoc_arr, "world"); *val = 8; JSLI(val, assoc_arr, "!"); *val = 16;
// iterate over indexes-values idx[0] = '\0';
JSLF(val, assoc_arr, idx); while(val != NULL) { printf("'%s' -> %d\n", idx, *val); JSLN(val, assoc_arr, idx); }
JudySLFreeArray(&assoc_arr, PJE0); return 0;
}</lang>
POSIX hcreate()
POSIX defines hcreate(), hdestroy() and hsearch() to manage a hash table. If you have a Unix system or clone, then your libc probably has these functions, so there is no extra library to install.
- You can only have one hash table, in the entire program!
- With GNU libc, you can call hcreate_r() to have more than one hash table.
- There is no way to delete an entry from the table!
- There is no way to iterate all keys in the table!
- Every key must be a NUL-terminated string.
- Every value is a void *, needs a cast to the correct type.
- The table might become full, and refuse to accept more entries. If you use hcreate(50) then the table might hold at most 50 entries.
- hdestroy() is almost impossible to use. Many programs keep the table and never call hdestroy().
- With BSD libc, hdestroy() will call free() with each key in the table.
- Else the program leaks memory, because there is no way to iterate the keys to free them.
The Linux manual page hcreate(3) has a short example.
The next example shows how to use hdestroy(), how to use intptr_t as a value, how to use double as a key, and how to pretend to delete an entry from the table! POSIX hash table is awful, and the code is too long, but the program does run.
<lang c>#include <inttypes.h> /* PRIxPTR */
- include <search.h> /* hcreate(), hsearch(), hdestroy() */
- include <stdint.h> /* intptr_t */
- include <stdio.h> /* perror(), printf(), puts(), snprintf() */
- include <stdlib.h> /* calloc(), exit(), free() */
- include <string.h> /* strdup() */
void fail(char *message) { perror(message); exit(1); }
/*
* color_example: * Maps color strings to integer values, like "red" => 0xff0000. * * Because the values (ENTRY data) are pointers, I use casts to and from * intptr_t to store integers instead of pointers. */
void color_example(void) { ENTRY e, *ep; int i;
puts("color_example:");
/* Create an empty table. */ if (hcreate(50) == 0) fail("hcreate");
/* Add keys => values to table. */ { char *keys[] = { "red", "green", "blue" }; intptr_t values[] = { 0xff0000, 0x00ff00, 0x0000ff };
for (i = 0; i < 3; i++) { /* Need strdup() when using ENTER. */ if ((e.key = strdup(keys[i])) == NULL) fail("strdup"); e.data = (void *)values[i];
/* Insert e into table. */ if (hsearch(e, ENTER) == NULL) fail("hsearch"); } }
/* Check if keys exist. */ { char *keys[] = { "blue", "green", "orange", "red" };
for (i = 0; i < 4; i++) { /* Not using ENTER, so no strdup(). */ e.key = keys[i]; ep = hsearch(e, FIND); if (ep) { printf("\t'%s' has value %06" PRIxPTR "\n", ep->key, (intptr_t)ep->data); } else printf("\t'%s' is not in table\n", e.key); } }
/* * Destroy table. On my machine, hdestroy() will free() all the * keys in the table. This is why I had to strdup() my keys. */ hdestroy(); }
/*
* number_example: * Maps numbers to strings or numbers, like 2 => 2.71828 or 4 => "four". * * First I need a VALUE that can either be a string or a number. */
typedef struct value { int v_type;
- define T_NUM 0x1 /* use v_num */
- define T_STR 0x2 /* use v_str */
- define T_DEL 0x4 /* pretend to delete from table */
union { double u_num; char *u_str; } v_union;
- define v_num v_union.u_num
- define v_str v_union.u_str
struct value *next; /* link all VALUEs in a list */ } VALUE;
void number_example(void) { ENTRY e, *ep; VALUE *all = NULL, *vp; int i, s; char buf[16];
puts("number_example:");
if (hcreate(50) == 0) fail("hcreate");
/* Add numeric values. */ { double keys[] = { 2, 3, 4, 5.6 }; double values[] = { 2.71828, 3.14159, 4.47214, 7.8 };
for (i = 0; i < 4; i++) { /* Must convert keys to strings. */ snprintf(buf, sizeof buf, "%.6g", keys[i]); if ((e.key = strdup(buf)) == NULL) fail("strdup");
/* Allocate a new VALUE. */ if ((vp = calloc(1, sizeof vp[0])) == NULL) fail("calloc"); vp->v_type = T_NUM; vp->v_num = values[i]; e.data = vp;
/* Remember to free it. */ vp->next = all; all = vp;
if (hsearch(e, ENTER) == NULL) fail("hsearch"); } }
/* * Add string values. * * For this example, all of my values will be static string * constants. This removes the need to free(vp->v_str) * when I replace or delete a value. */ { double keys[] = { 4, 8, 10 }; char *values[] = { "four", "eight", "ten" };
for (i = 0; i < 3; i++) { /* Must convert keys to strings. */ snprintf(buf, sizeof buf, "%.6g", keys[i]);
/* * This shows how to add or replace a value * in the table (so I can change an entry * from 4 => 4.47214 to 4 => "four"). */ e.key = buf; ep = hsearch(e, FIND); if (ep) { /* Already have key. Change value. */ vp = (VALUE *)ep->data; vp->v_type = T_STR; vp->v_str = values[i]; } else { /* Need to add key. */ if ((e.key = strdup(buf)) == NULL) fail("strdup");
if ((vp = calloc(1, sizeof vp[0])) == NULL) fail("calloc"); vp->v_type = T_STR; vp->v_str = values[i]; e.data = vp;
/* Remember to free it. */ vp->next = all; all = vp;
if (hsearch(e, ENTER) == NULL) fail("hsearch"); } } }
/* Delete key 8. */ snprintf(buf, sizeof buf, "%.6g", (double)8); e.key = buf; ep = hsearch(e, FIND); if (ep) { vp = (VALUE *)ep->data; vp->v_type = T_DEL; }
/* Check if keys exist. */ { double keys[] = { 2, 3, 4, 5.6, 7, 8, 10 };
for (i = 0; i < 7; i++) { snprintf(buf, sizeof buf, "%.6g", keys[i]); e.key = buf; ep = hsearch(e, FIND);
if (ep == NULL || (vp = (VALUE *)ep->data)->v_type & T_DEL) { printf("\t%s is not in the table\n", buf); } else if (vp->v_type & T_NUM) { printf("\t%s has value %g\n", buf, vp->v_num); } else if (vp->v_type & T_STR) { printf("\t%s has value '%s'\n", buf, vp->v_str); } else { printf("\t%s has invalid value\n", buf); } } }
/* Destroy table and keys. */ hdestroy();
/* Free all values. */ while (all != NULL) { vp = all->next; free(all); all = vp; } }
int main() { color_example(); number_example(); return 0; }</lang>
Output:
color_example: 'blue' has value 0000ff 'green' has value 00ff00 'orange' is not in table 'red' has value ff0000 number_example: 2 has value 2.71828 3 has value 3.14159 4 has value 'four' 5.6 has value 7.8 7 is not in the table 8 is not in the table 10 has value 'ten'
BSD dbopen()
BSD provides dbopen() in <db.h>. This is Berkeley DB 1.85. Because dbopen() often puts a database on disk, one easily forgets that dbopen(NULL, ...) can put a small database in memory. When the type is DB_BTREE or DB_HASH, then the database is an associative array.
- Warning: some GNU/Linux systems have a dbopen(3) manual page without a real dbopen() function. See Debian bug #337581.
The next example translates the previous example to dbopen(). Every key and value is a *void, needs a cast to the correct type. Because BSD also has <err.h>, I remove fail() and use err().
<lang c>#include <sys/types.h>
- include <err.h> /* err() */
- include <fcntl.h>
- include <limits.h>
- include <stdio.h> /* printf(), puts() */
- include <string.h> /* memset() */
- include <db.h> /* dbopen() */
/*
* color_example: * Maps color strings to integer values, like "red" => 0xff0000. */
void color_example(void) { DB *db; DBT key, value; int i, r;
puts("color_example:");
/* Create an empty table. */ db = dbopen(NULL, O_RDWR, 0777, DB_HASH, NULL); if (db == NULL) err(1, "dbopen");
/* Add keys => values to table. */ { char *keys[] = { "red", "green", "blue" }; int values[] = { 0xff0000, 0x00ff00, 0x0000ff };
for (i = 0; i < 3; i++) { /* key.size must count the '\0' byte */ key.data = keys[i]; key.size = strlen(keys[i]) + 1; value.data = &values[i]; value.size = sizeof values[i];
/* * Insert key, value into table. DB will * allocate its own storage for key, value. */ if (db->put(db, &key, &value, 0) < 0) err(1, "db->put"); } }
/* Check if keys exist. */ { char *keys[] = { "blue", "green", "orange", "red" };
for (i = 0; i < 4; i++) { key.data = (void *)keys[i]; key.size = strlen(keys[i]) + 1;
r = db->get(db, &key, &value, 0); if (r < 0) err(1, "db->get"); else if (r > 0) { printf("\t'%s' is not in table\n", (char *)key.data); } else { printf("\t'%s' has value %06x\n", (char *)key.data, *(int *)value.data); } } }
/* Destroy table. */ db->close(db); }
/*
* number_example: * Maps numbers to strings or numbers, like 2 => 2.71828 or 4 => "four". * * First I need a VALUE that can either be a string or a number. */
typedef struct value { int v_type;
- define T_NUM 0x1 /* use v_num */
- define T_STR 0x2 /* use v_str */
union { double u_num; char *u_str; } v_union;
- define v_num v_union.u_num
- define v_str v_union.u_str
} VALUE;
void number_example(void) { DB *db; DBT key, value; VALUE v, *vp; double d; int i, r;
puts("number_example:");
db = dbopen(NULL, O_RDWR, 0777, DB_HASH, NULL); if (db == NULL) err(1, "dbopen");
/* Add numeric values. */ { double keys[] = { 2, 3, 4, 5.6 }; double values[] = { 2.71828, 3.14159, 4.47214, 7.8 };
for (i = 0; i < 4; i++) { memset(&v, 0, sizeof v); v.v_type = T_NUM; v.v_num = values[i];
key.data = &keys[i]; key.size = sizeof keys[i]; value.data = &v; value.size = sizeof v;
if (db->put(db, &key, &value, 0) < 0) err(1, "db->put"); } }
/* * Add string values. * * For this example, all of my values will be static string * constants. This removes the need to free(vp->v_str) * when I replace or delete a value. */ { double keys[] = { 4, 8, 10 }; char *values[] = { "four", "eight", "ten" };
for (i = 0; i < 3; i++) { memset(&v, 0, sizeof v); v.v_type = T_STR; v.v_str = values[i];
key.data = &keys[i]; key.size = sizeof keys[i]; value.data = &v; value.size = sizeof v;
/* * db->put can replace an entry (so I can change * it from 4 => 4.47214 to 4 => "four"). * * I am storing the strings outside the DB. * To put them inside the DB, I would need to * remove the v.v_str pointer and append each * string data to value.data. */ if (db->put(db, &key, &value, 0) < 0) err(1, "db->put"); } }
/* Delete key 8. */ d = 8; key.data = &d; key.size = sizeof d; r = db->del(db, &key, 0); if (r < 0) err(1, "db->del");
/* Iterate all keys. */ r = db->seq(db, &key, &value, R_FIRST); if (r < 0) err(1, "db->seq"); else if (r > 0) puts("\tno keys!"); else { printf("\tall keys: %g", *(double *)key.data); while ((r = db->seq(db, &key, &value, R_NEXT)) == 0) printf(", %g", *(double *)key.data); if (r < 0) err(1, "db->seq"); puts(""); }
/* Check if keys exist. */ { double keys[] = { 2, 3, 4, 5.6, 7, 8, 10 };
for (i = 0; i < 7; i++) { key.data = &keys[i]; key.size = sizeof keys[i];
r = db->get(db, &key, &value, 0); if (r < 0) err(1, "db->get"); else if (r > 0) { printf("\t%g is not in the table\n", keys[i]); continue; }
vp = (VALUE *)value.data; if (vp->v_type & T_NUM) { printf("\t%g has value %g\n", keys[i], vp->v_num); } else if (vp->v_type & T_STR) { printf("\t%g has value '%s'\n", keys[i], vp->v_str); } else { printf("\t%g has invalid value\n", keys[i]); } } }
db->close(db); }
int main() { color_example(); number_example(); return 0; }</lang>
Output:
color_example: 'blue' has value 0000ff 'green' has value 00ff00 'orange' is not in table 'red' has value ff0000 number_example: all keys: 2, 3, 5.6, 4, 10 2 has value 2.71828 3 has value 3.14159 4 has value 'four' 5.6 has value 7.8 7 is not in the table 8 is not in the table 10 has value 'ten'
BSD sys/tree.h
<lang c>#include <sys/tree.h>
- include <err.h> /* err() */
- include <stdio.h> /* printf(), puts() */
- include <stdlib.h> /* calloc(), free() */
- include <string.h> /* strcmp() */
/*
* color_example: * Maps color strings to integer values, like "red" => 0xff0000. * * I will use a red-black tree of type 'struct ctree', which contains * nodes of type 'struct cnode'. */
struct cnode { RB_ENTRY(cnode) entry; char *key; int value; };
int cnodecmp(struct cnode *a, struct cnode *b) { return strcmp(a->key, b->key); }
RB_HEAD(ctree, cnode); RB_GENERATE(ctree, cnode, entry, cnodecmp)
void color_example(void) { struct ctree head = RB_INITIALIZER(&head); /* an empty tree */ struct cnode n, *np, *op; int i;
puts("color_example:");
/* Add keys => values to tree. */ { char *keys[] = { "red", "green", "blue" }; int values[] = { 0xff0000, 0x00ff00, 0x0000ff };
for (i = 0; i < 3; i++) { if ((np = calloc(1, sizeof np[0])) == NULL) err(1, "calloc"); np->key = keys[i]; np->value = values[i]; RB_INSERT(ctree, &head, np); } }
/* Check if keys exist. */ { char *keys[] = { "blue", "green", "orange", "red" };
for (i = 0; i < 4; i++) { n.key = keys[i]; np = RB_FIND(ctree, &head, &n);
if (np) { printf("\t'%s' has value %06x\n", np->key, np->value); } else { printf("\t'%s' is not in tree\n", n.key); } } }
/* Free tree. */ for (np = RB_MIN(ctree, &head); np != NULL; np = op) { op = RB_NEXT(ctree, &head, np); RB_REMOVE(ctree, &head, np); free(np); } }
/*
* number_example: * Maps numbers to strings or numbers, like 2 => 2.71828 or 4 => "four". * * This node has a value that can either be a string or a number. */
struct dnode { RB_ENTRY(dnode) entry; double key; int v_type;
- define T_NUM 0x1 /* use v_num */
- define T_STR 0x2 /* use v_str */
union { double u_num; char *u_str; } v_union;
- define v_num v_union.u_num
- define v_str v_union.u_str
};
int dnodecmp(struct dnode *a, struct dnode *b) { double aa = a->key; double bb = b->key; return (aa < bb) ? -1 : (aa > bb); }
RB_HEAD(dtree, dnode); RB_GENERATE(dtree, dnode, entry, dnodecmp)
void number_example(void) { struct dtree head = RB_INITIALIZER(&head); struct dnode n, *np, *op; int i;
puts("number_example:");
/* Add numeric values. */ { double keys[] = { 2, 3, 4, 5.6 }; double values[] = { 2.71828, 3.14159, 4.47214, 7.8 };
for (i = 0; i < 4; i++) { if ((np = calloc(1, sizeof np[0])) == NULL) err(1, "calloc"); np->key = keys[i]; np->v_type = T_NUM; np->v_num = values[i]; RB_INSERT(dtree, &head, np); } }
/* * Add string values. * * For this example, all of my values will be static string * constants. This removes the need to free(np->v_str) * when I replace or delete a value. */ { double keys[] = { 4, 8, 10 }; char *values[] = { "four", "eight", "ten" };
for (i = 0; i < 3; i++) { /* * This shows how to add or replace a value * in the tree (so I can change an entry * from 4 => 4.47214 to 4 => "four"). */ n.key = keys[i]; if (np = RB_FIND(dtree, &head, &n)) { np->v_type = T_STR; np->v_str = values[i]; } else if (np = calloc(1, sizeof np[0])) { np->key = keys[i]; np->v_type = T_STR; np->v_str = values[i]; RB_INSERT(dtree, &head, np); } else err(1, "calloc"); } }
/* Delete key 8. */ n.key = 8; if (np = RB_FIND(dtree, &head, &n)) RB_REMOVE(dtree, &head, np);
/* Iterate all keys. */ i = 1; RB_FOREACH(np, dtree, &head) { if (i) { printf("\tall keys: %g", np->key); i = 0; } else printf(", %g", np->key); } if (i) puts("\tno keys!"); else puts("");
/* Check if keys exist. */ { double keys[] = { 2, 3, 4, 5.6, 7, 8, 10 };
for (i = 0; i < 7; i++) { n.key = keys[i]; np = RB_FIND(dtree, &head, &n);
if (np == NULL) { printf("\t%g is not in the tree\n", keys[i]); } else if (np->v_type & T_NUM) { printf("\t%g has value %g\n", keys[i], np->v_num); } else if (np->v_type & T_STR) { printf("\t%g has value '%s'\n", keys[i], np->v_str); } else { printf("\t%g has invalid value\n", keys[i]); } } }
/* Free tree. */ for (np = RB_MIN(dtree, &head); np != NULL; np = op) { op = RB_NEXT(dtree, &head, np); RB_REMOVE(dtree, &head, np); free(np); } }
int main() { color_example(); number_example(); return 0; }</lang>
Output:
color_example: 'blue' has value 0000ff 'green' has value 00ff00 'orange' is not in tree 'red' has value ff0000 number_example: all keys: 2, 3, 4, 5.6, 10 2 has value 2.71828 3 has value 3.14159 4 has value 'four' 5.6 has value 7.8 7 is not in the tree 8 is not in the tree 10 has value 'ten'