Question

I'm trying to write a cross-referencer that prints a list of words and the line numbers in which they occur, ignoring "noise" words such as a, an, the, etc. (Kernighan and Ritchie, ANSI edition p 143 question 6-3). The following is the code :

#include<stdio.h>

#include<stdlib.h>

#include<string.h>

#include"getch.h"

#include"search_tools.h" /*this contains the binsearch function*/

#include"getword.h"

#define MAXWORD 100

char* noise[] = {"a","and","if","is","the"}; /*noise words that we need to exclude*/

int linecount = 1;

struct tnode {

        int line_number;

        char* word;

        struct tnode* left; /*for all those words lexicographically less than word*/

        struct tnode* middle; /*for all those words lexicographically equal to word*/

        struct tnode* right; /*for all those words lexicographically greater than word*/

              };

/*this function tells you where you should put your new word*/

struct tnode* addtree (struct tnode* p, char* w) {

        int cond;

        if (p == NULL) {

                p = (struct tnode*)malloc(sizeof(struct tnode));

                p -> word = strdup(w);

                p -> line_number = linecount;

                p -> left = p -> right = p -> middle = NULL;

                        }

        else if ((cond = strcmp(w,p->word)) == 0) {

                p -> middle = addtree(p -> middle,w);


                                                   }

        else if (cond > 0) {

                p -> right = addtree(p -> right,w);

                            }

        else if (cond < 0) {

                p -> left = addtree(p -> left, w);

                            }

        else {

                ;

             }

        return p;
                                                   }

void treeprint (struct tnode* p) {

        struct tnode* q;

        if (p != NULL) {

                treeprint(p->left);

                printf("%s occurs in the following lines:\n",p -> word);

                for (q = p; q != NULL; q = q->middle)

                        printf("%4d ",q -> line_number);

                printf("\n\n");

                treeprint(p->right);
                        }


                                  }

int main (int argc, char* argv[]) {

        struct tnode* root;

        char word[MAXWORD];

        root = NULL;

        int c;

        while ((c = getword(word,MAXWORD)) != EOF) {

                if (isalpha(word[0]) && binsearch(word,noise,5) == -1)

                        root = addtree(root,word);

                else if (c == '\n')

                        linecount++;




                                                    }

        treeprint(root);

        return 0;

                                   }

This is the getword function that I used :

int getword (char* word, int lim) {

    int c;

    char* w;

    w = word;      

    while (isspace(c = getch())) /*skip white spaces*/

            ;

    if (c != EOF)

            *w++ = c;

    if (!isalpha(c)) { /*the first character in a word can be a #, as in #define or   #include*/

            *w = '\0';

            return c;

    }

    while(isalnum(c = getch()) && --lim > 0)

            *w++ = c;

    ungetch(c);

    *w = '\0';

    return word[0]; /*signal that a word has been collected*/
}

And the following is binsearch for searching char pointers:

int binsearch (char * word, struct key tab[], int n) {

        int cond;

        int low, high, mid;

        low = 0;

        high = n -1;

        while (low <= high) {

                mid = (low+high)/2;

                if ((cond = strcmp(word,tab[mid].word)) < 0)

                        high = mid - 1;

                else if (cond > 0)

                        low = mid + 1;

                else /*found it*/

                        return mid;
                            }

        return -1;

}

If getword comes across a non alphabetic (first character of word) or non-numeric character, it's supposed to return it. So I set lincecount to increment when getword returns a '\n'. But this doesn't seem to happen.

Was it helpful?

Solution

You have a problem here in the getword function.

while (isspace(c = getch())) /*skip white spaces*/

        ;

because '\n' is among the whitespace characters that will be skipped. So the code that tries to count lines will never see it.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top