Question

I am trying to write a function to clean up the hash table that is generated by this code

/*
* Markov chain random text generator.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "eprintf.h"

enum {
NPREF   = 2,    /* number of prefix words */
NHASH   = 4093, /* size of state hash table array */
MAXGEN  = 10000 /* maximum words generated */
};

typedef struct State State;
typedef struct Suffix Suffix;

struct State {  /* prefix + suffix list */
char*   pref[NPREF];    /* prefix words */
Suffix* suf;            /* list of suffixes */
State*  next;           /* next in hash table */
};

struct Suffix { /* list of suffixes */
char*   word;           /* suffix */
Suffix* next;           /* next in list of suffixes */
};

State* lookup(char *prefix[], int create);
void build(char *prefix[], FILE*);
void generate(int nwords);
void add(char *prefix[], char *word);

State* statetab[NHASH]; /* hash table of states */

char NONWORD[] = "\n";  /* cannot appear as real word */

/* markov main: markov-chain random text generation */
int main(void)
{
int i, nwords = MAXGEN;
char *prefix[NPREF];        /* current input prefix */

int c;
long seed;

setProgName("markov");
seed = time(NULL);

srand(seed);
for (i = 0; i < NPREF; i++) /* set up initial prefix */
    prefix[i] = NONWORD;
build(prefix, stdin);
add(prefix, NONWORD);
generate(nwords);
return 0;
}

const int MULTIPLIER = 31;  /* for hash() */

/* hash: compute hash value for array of NPREF strings */
unsigned int hash(char* s[NPREF])
{
unsigned int h;
unsigned char *p;
int i;

h = 0;
for (i = 0; i < NPREF; i++)
    for (p = (unsigned char *) s[i]; *p != '\0'; p++)
        h = MULTIPLIER * h + *p;
return h % NHASH;
}


/* lookup: search for prefix; create if requested. */
/*  returns pointer if present or created; NULL if not. */
/*  creation doesn't strdup so strings mustn't change later. */
State* lookup(char *prefix[NPREF], int create)
{
int i, h;
State *sp;

h = hash(prefix);
for (sp = statetab[h]; sp != NULL; sp = sp->next) {
    for (i = 0; i < NPREF; i++)
        if (strcmp(prefix[i], sp->pref[i]) != 0)
            break;
    if (i == NPREF)     /* found it */
        return sp;
}
if (create) {
    sp = (State *) emalloc(sizeof(State));
    for (i = 0; i < NPREF; i++)
        sp->pref[i] = prefix[i];
    sp->suf = NULL;
    sp->next = statetab[h];
    statetab[h] = sp;
}
return sp;
}

/* addsuffix: add to state. suffix must not change later */
void addsuffix(State *sp, char *suffix)
{
Suffix *suf;

suf = (Suffix *) emalloc(sizeof(Suffix));
suf->word = suffix;
suf->next = sp->suf;
sp->suf = suf;
}

/* add: add word to suffix list, update prefix */
void add(char *prefix[NPREF], char *suffix)
{
State *sp;

sp = lookup(prefix, 1);  /* create if not found */
addsuffix(sp, suffix);
/* move the words down the prefix */
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1] = suffix;
}

/* build: read input, build prefix table */
void build(char *prefix[NPREF], FILE *f)
{
char buf[100], fmt[10];

/* create a format string; %s could overflow buf */
sprintf(fmt, "%%%ds", sizeof(buf)-1);
while (fscanf(f, fmt, buf) != EOF)
    add(prefix, estrdup(buf));
}

/* generate: produce output, one word per line */
void generate(int nwords)
{
State *sp;
Suffix *suf;
char *prefix[NPREF], *w;
int i, nmatch;

for (i = 0; i < NPREF; i++) /* reset initial prefix */
    prefix[i] = NONWORD;

for (i = 0; i < nwords; i++) {
    sp = lookup(prefix, 0);
    if (sp == NULL)
        eprintf("internal error: lookup failed");
    nmatch = 0;
    for (suf = sp->suf; suf != NULL; suf = suf->next)
        if (rand() % ++nmatch == 0) /* prob = 1/nmatch */
            w = suf->word;
    if (nmatch == 0)
        eprintf("internal error: no suffix %d %s", i, prefix[0]);
    if (strcmp(w, NONWORD) == 0)
        break;
    printf("%s\n", w);
    memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
    prefix[NPREF-1] = w;
}
}

Here is what I have so far for my clean function

/*Clean Function*/
void clean_up(State *sp)
{
State *temp;
Suffix *temp2, temp3;

for(int h = 0; h < NHASH; h++)
{
    for (sp = statetab[h]; sp != NULL; sp = sp->next)
    {
        while(sp->suf != NULL) 
        {
            temp2= sp->suf;
            temp3= *temp2->next;
            free(temp2);
            sp->suf= &temp3;
        }

    }
}
}

I think im on the right track, I'm going through each index in the hash table, then going from state to state and freeing the suffixes. I'm not sure what to do about the prefixes, because I have to free them before I can free each state. Any help would be greatly appreciated.

Was it helpful?

Solution

In your code, you are copying into a temp3 node, which lives in automatic memory ("on the stack") pointing sp->suf to this memory will (on the next iteration of the loop) cause free to be called with the address of this object (which has not been obtained by malloc, and thus cannot be freed by free() )

void clean_up(State *sp)
{
State *temp;
Suffix *temp2, **pp;

for(int h = 0; h < NHASH; h++)
{
    for (sp = statetab[h]; sp != NULL; sp = sp->next)
    {
        for (pp = &sp->suf; *pp; *pp = temp2) 
        {
            temp2 = (*pp)->next;     
            free(*pp);
        }

    }
}
}

OTHER TIPS

The example code is derived from the Markov program in The Practice of Programming by Kernighan and Pike, a most excellent book.

Given that you are trying to clean up the statetab, the main clean-up function doesn't need any argument. You do have to be careful not to free the states directly in statetab, but you do need to release auxilliary states chained off statetab[i].next.

typedef struct State State;
typedef struct Suffix Suffix;

struct State {  /* prefix + suffix list */
char*   pref[NPREF];    /* prefix words */
Suffix* suf;            /* list of suffixes */
State*  next;           /* next in hash table */
};

struct Suffix { /* list of suffixes */
char*   word;           /* suffix */
Suffix* next;           /* next in list of suffixes */
};

State* statetab[NHASH]; /* hash table of states */

static void free_state(State *state);
static void free_suffix(Suffix *suffix);

static void cleanup(void)
{
   for (int i = 0; i < NHASH; i++)
      free_state(statetab[i]);
}

static void free_state(State *state)
{
    if (state != 0)
    {
        for (int i = 0; i < NPREF; i++)
            free(state->pref[i]);
        free_suffix(state->suf);
        if (state->next != 0)
        {
            free_state(state->next);
            free(state->next);
        }
    }
}

static void free_suffix(Suffix *suffix)
{
    if (suffix != 0)
    {
        free(suffix->word);
        free_suffix(suffix->next);
        free(suffix);
    }
}

Do you see how I've designed the free_xxxx() code based on the design of the xxxx structure?

Caveat Lector: uncompiled code, much less tested code.


I dug up the code from the TPOP site, and tried to apply it. I made some fixes to the freeing code above (syntax error fixed, the null checks in free_state() and free_suffix()), but the code as a whole was not designed to allow the data to be freed.

There are a couple of problems. First, a few of the prefixes are not allocated (NONWORD). It might be possible to avoid releasing those by testing whether a prefix is NONWORD, but that's nasty. It might be possible to allocate those prefixes too (replace NONWORD by estrdup(NONWORD)). I think there's another place, somewhere, that a non-allocated pointer is being stashed in a prefix in the state table; I'm getting crashes in malloc() complaining of 'freeing non-allocated memory' (which is distinct from 'double freeing allocated memory', I believe), but I've not managed to resolve that.

However, that then changes to another problem; the prefixes are reused. That is, almost every prefix in the system is used as the the second word of one prefix, then as the first word of the next prefix. Thus, you can't readily free the prefixes.

If you were to design this so that the memory could be released, then you'd probably design it so that there was a system of 'atoms' (immutable strings) such that each word was allocated once and reused as often as necessary (see C Interfaces and Implementations: Techniques for Creating Reusable Code by D Hanson for the source of the term). The code freeing the state table would then concentrate only on the non-word data. There'd be code to release the complete set of atoms as well.

I ran the Markov program under valgrind without the cleanup; there are no memory access problems and no leaked data; it is all still accessible at program exit. I was using a data file of about 15,000 words (and about 2900 distinct words), and the statistics were:

==9610== HEAP SUMMARY:
==9610==     in use at exit: 695,269 bytes in 39,567 blocks
==9610==   total heap usage: 39,567 allocs, 0 frees, 695,269 bytes allocated
==9610== 
==9610== LEAK SUMMARY:
==9610==    definitely lost: 0 bytes in 0 blocks
==9610==    indirectly lost: 0 bytes in 0 blocks
==9610==      possibly lost: 0 bytes in 0 blocks
==9610==    still reachable: 695,269 bytes in 39,567 blocks

So, you set yourself an interesting exercise. However, I think it is not achievable without reworking some of the memory allocation mechanism so that the data can be freed cleanly.

(On BSD, and hence on Mac OS X too, there are a pair of functions in <stdlib.h> called setprogname() and getprogname(). On BSD, setprogname() is called automatically before the main() gets going (with argv[0], I believe). The declaration in eprintf.h conflicts with the declaration in <stdlib.h>, which may be why the code in the question uses setProgName() instead of the original setprogname(). I chose to fix setprogname() in eprintf.h so that it took a const char * argument and therefore matched the declaration in <stdlib.h>.)


TPOP was previously at http://plan9.bell-labs.com/cm/cs/tpop and http://cm.bell-labs.com/cm/cs/tpop but both are now (2015-08-10) broken. See also Wikipedia on TPOP.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top