Exercise 6.4 - Words and Frequency¶
Question¶
Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.
/*
* Write a program that prints the distinct words in its input sorted into
* decreasing order of frequency of occurrence. Precede each word by its count.
*/
/*
* Create a Tree with word and count, just like tnode.
* Parse the tree and create a new tree with count and list of words in the
* node. Print the new tree in-order traversal.
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXWORD 1000
struct tnode {
char *word;
int count;
struct tnode *left;
struct tnode *right;
};
struct bynumbernode {
int number;
struct words *wordlist;
struct bynumbernode *left;
struct bynumbernode *right;
};
struct words {
char *word;
struct words *nextword;
};
struct tnode *addtree(struct tnode *p, char *w);
struct bynumbernode *addnumtree(struct bynumbernode *, int, char *);
struct words *addwordtolist(struct words *, char *);
void printwords(const struct words *, const int);
struct bynumbernode *traverse(const struct tnode *, struct bynumbernode *);
void treeprint(const struct bynumbernode *);
int mgetword(char *, int);
struct tnode *talloc(void) {
return (struct tnode *) malloc(sizeof(struct tnode));
};
struct bynumbernode *bynumbernodealloc(void) {
return (struct bynumbernode *) malloc(sizeof(struct bynumbernode));
};
struct words *wordsalloc(void) {
return (struct words *) malloc(sizeof(struct words));
};
#define BUFSIZE 100
char buf[BUFSIZE];
int bufp = 0;
int getch(void) { return (bufp > 0) ? buf[--bufp] : getchar(); }
void ungetch(int c) {
if (bufp >= BUFSIZE) {
printf("ungetch: too many characters\n");
} else
buf[bufp++] = c;
return;
}
char *mstrdup(char *s) {
char *p;
p = (char *) malloc(strlen(s) + 1);
if (p != NULL) {
strcpy(p, s);
}
return p;
}
int getword(char *word, int lim) {
int c, getch(void);
void ungetch(int);
char *w = word;
while (isspace(c = getch()) || c == '_' || c == '/' || c == '#' ||
c == '*' || c == '"');
if (c != EOF) {
*w++ = c;
}
if (!isalpha(c)) {
*w = '\0';
return c;
}
for (; --lim > 0; w++) {
if (!isalnum(*w = getch())) {
ungetch(*w);
break;
}
}
*w = '\0';
return word[0];
}
/* addtree : add a node with w at or below p */
struct tnode *addtree(struct tnode *p, char *w) {
int cond;
if (p == NULL) { /* new word */
p = talloc(); // make new node
p->word = mstrdup(w);
p->count = 1;
p->left = p->right = NULL;
} else if ((cond = strcmp(w, p->word)) == 0) {
p->count++;
} else if (cond < 0) {
p->left = addtree(p->left, w);
} else {
p->right = addtree(p->right, w);
}
return p;
}
// treeprint: in-order print of tree p
void treeprint(const struct bynumbernode *p) {
if (p != NULL) {
treeprint(p->left);
printwords(p->wordlist, p->number);
treeprint(p->right);
}
}
void printwords(const struct words *w, const int count) {
if (w != NULL) {
printf("%d->%s", count, w->word);
w = w->nextword;
}
while (w != NULL) {
printf(", %s", w->word);
w = w->nextword;
}
printf("\n");
}
struct words *addwordtolist(struct words *list, char *w) {
if (list == NULL) {
list = wordsalloc();
list->word = mstrdup(w);
list->nextword = NULL;
} else {
list->nextword = addwordtolist(list->nextword, w);
}
return list;
}
struct bynumbernode *addnumtree(struct bynumbernode *n, int i, char *w) {
if (n == NULL) {
n = bynumbernodealloc();
n->number = i;
n->wordlist = NULL;
n->wordlist = addwordtolist(n->wordlist, w);
} else if (n->number == i) {
addwordtolist(n->wordlist, w);
} else if (n->number < i) {
n->left = addnumtree(n->left, i, w);
} else {
n->right = addnumtree(n->right, i, w);
}
return n;
}
struct bynumbernode *traverse(const struct tnode *p, struct bynumbernode *q) {
if (p != NULL) {
q = traverse(p->left, q);
q = addnumtree(q, p->count, p->word);
q = traverse(p->right, q);
}
return q;
}
void main() {
struct tnode *root;
char word[MAXWORD];
struct bynumbernode *nroot;
root = NULL;
nroot = NULL;
while (getword(word, MAXWORD) != EOF) {
if (isalpha(word[0])) {
root = addtree(root, word);
}
}
nroot = traverse(root, nroot);
printf("Words by frequency:\n");
treeprint(nroot);
return;
}
Explanation¶
This program prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Each word is preceded by its count.
This works by creating a Tree with word and count, just like tnode. Parse the tree and create a new tree with count and list of words in the node. Print the new tree in-order traversal.
ab
ab
bc
cd
ef
gh
ab
x
Words and their frequencies:
bc->1
cd->1
ef->1
gh->1
ab->3