Exercise 6.1 - getword¶
Question¶
Our version of getword does not properly handle underscores, string constants, comments, or preprocessor control lines. Write a better version.
#include <ctype.h>
#include <stdio.h>
#include <string.h>
struct key {
char *word;
int count;
} keytab[] = {"auto", 0, "break", 0, "case", 0, "char", 0,
"const", 0, "continue", 0, "default", 0, "do", 0,
"double", 0, "else", 0, "enum", 0, "extern", 0,
"float", 0, "for", 0, "goto", 0, "if", 0,
"int", 0, "long", 0, "register", 0, "return", 0,
"short", 0, "signed", 0, "sizeof", 0, "static", 0,
"struct", 0, "switch", 0, "typedef", 0, "union", 0,
"unsigned", 0, "void", 0, "volatite", 0, "while", 0};
int mygetword(char *, int);
int binsearch(char *, struct key *, int);
#define NKEYS (sizeof(keytab) / sizeof(keytab[0]))
#define MAXWORD 100
int main(int argc, char *argv[]) {
int n;
char word[MAXWORD];
while (mygetword(word, MAXWORD) != EOF)
if (isalpha(word[0]))
if ((n = binsearch(word, keytab, NKEYS)) >= 0)
keytab[n].count++;
for (n = 0; n < NKEYS; n++)
if (keytab[n].count > 0)
printf("%4d %s\n", keytab[n].count, keytab[n].word);
}
int binsearch(char *word, struct key lkeytab[], int n) {
int cond;
int low, high, mid;
low = 0;
high = n - 1;
while (low <= high) {
mid = (low + high) / 2;
if ((cond = strcmp(word, lkeytab[mid].word)) < 0)
high = mid - 1;
else if (cond > 0)
low = mid + 1;
else
return mid;
}
return -1;
}
int mygetword(char *word, int lim) {
int c, getch(void);
void ungetch(int);
char *w = word;
int t;
while (isspace(c = getch()));
if (c != EOF)
*w++ = c;
if (!isalpha(c)) {
if (c == '\"') { /*string constant*/
for (c = getch(); c != '\"'; c = getch());
} else if (c == '#') { /*preprocessor*/
for (c = getch(); c != '\n'; c = getch());
} else if (c == '/') /*comment*/
if ((c = getch()) == '/') { /*single comment*/
for (c = getch(); c != '\n'; c = getch());
} else if (c == '*') { /*mutiline comment*/
for (c = getch(), t = getch(); c != '*' && t != '/';
c = getch(), t = getch())
ungetch(t);
} else
ungetch(c);
else /*underscore*/
for (; !isspace(c) && c != EOF; c = getch());
if (c != '\"' && c != '\n' && c != '/')
ungetch(c);
*w = '\0';
return c;
}
for (; --lim > 0; w++)
if (!isalnum(*w = getch())) {
if (!isspace(*w)) {
ungetch(*w);
return (*w);
} else {
ungetch(*w);
break;
}
}
*w = '\0';
return word[0];
}
#define BUFSIZE 100
char buf[BUFSIZE];
int bufp = 0;
int getch(void) { return (bufp > 0) ? buf[--bufp] : getchar(); }
void ungetch(int c) {
if (bufp >= BUFSIZE)
printf("ungetch: too many characters\n");
else
buf[bufp++] = c;
}
This is program from Section 6.3 implementing getword.
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#define BUFSIZE 100
#define MAXWORD 100
#define NKEYS (sizeof keytab / sizeof(struct key))
char buf[BUFSIZE]; /* buffer for ungetch */
int bufp = 0; /* next free position in buf */
/* Figure out why printf is a special case */
struct key {
char *word;
int count;
} keytab[] = {
"auto", 0,
"break", 0,
"case", 0,
"char", 0,
"const", 0,
"continue", 0,
"default", 0,
"unsigned", 0,
"void", 0,
"volatile", 0,
"while", 0,
"printf", 0,
};
int binsearch(char *, struct key *, int);
int getch(void);
int mgetword(char *, int);
void ungetch(int c);
int getch(void) /* get a (possibly pushed back) character */
{
return (bufp > 0) ? buf[--bufp] : getchar();
}
void ungetch(int c) /* push a character back on input */
{
if (bufp >= BUFSIZE)
printf("ungetch: too many characters \n");
else
buf[bufp++] = c;
}
/* binsearch: find word in tab[0]...tab[n-1] */
int binsearch(char *word, struct key tab[], int n) {
int cond;
int low, high, mid;
low = 0;
high = n - 1;
while (low <= high) {
mid = (low + high) / 2;
if ((cond = strcmp(word, tab[mid].word)) < 0)
high = mid - 1;
else if (cond > 0)
low = mid + 1;
else
return mid;
}
return -1;
}
/* getword: get next word or character from input */
int mgetword(char *word, int lim) {
int c;
char *w = word;
while (isspace(c = getch()));
if (c != EOF)
*w++ = c;
if (!isalpha(c)) {
*w = '\0';
return c;
}
for (; --lim > 0; w++)
if (!isalnum(*w = getch())) {
ungetch(*w);
break;
}
*w = '\0';
return word[0];
}
/* count C keywords */
int main(int argc, char *argv[]) {
int n;
char word[MAXWORD];
while (mgetword(word, MAXWORD) != EOF)
if (isalpha(word[0]))
if ((n = binsearch(word, keytab, NKEYS)) >= 0)
keytab[n].count++;
for (n = 0; n < NKEYS; n++)
if (keytab[n].count > 0)
printf("%4d %s\n", keytab[n].count, keytab[n].word);
return 0;
}
Explanation¶
This program identifies the keywords in the given input.
$ ./ex_6.1_getword
this is a short sentence.
1 short