/* ***** Template for the ISAM disk structure of words assignment ***** */ #include #include #include #include #define MAXWLEN 20 #define CLSIZE 1024 #define EPERCLUST 42 /* pack 42 elements/cluster using 1008/1024 bytes */ #define NLEVELS 3 /* allow 2 index levels 0, 1 plus the word level 2 */ #define WORDLEV (NLEVELS-1) /* actual words are on level 2 */ /* these are the individual records to pack EPERCLUST in each cluster */ struct element { /* each element is a word and count or a word index */ int count; /* a dual purpose field - cluster index or word count */ char word[MAXWLEN]; }; struct cluster { /* use the same cluster struct for index or words */ int isindexcluster; /* set to 1 for index clust or 0 for words */ struct element elem[EPERCLUST]; } clust[NLEVELS]; /* allow 2 index levels 0, 1 plus the word level 2 */ /* these are the major variables you will need as globals for convenience */ int fd, clust_pos, nincluster[NLEVELS], location[NLEVELS]; void isam_output(int lev, char *word, int count) { /* ***** BUILD AND WRITE INDEX AND DATA CLUSTERS FOR WORDS ***** */ } void isam_end() { int lev; /* flush residual data and index clusters */ for(lev = 0; lev < NLEVELS; lev++) { /* ***** YOUR WRITES OF THE REMAINING INFORMATION ***** */ } fprintf(stderr, "Total of %d clusters written\n", clust_pos); } /* ***** ALL OF THE ISAM STUFF IS LOCATED ABOVE THIS POINT ***** */ /* LEAVE THE REST OF THIS CODE AS IS - IT'S MOSTLY THE BINARY TREE SOLN */ struct tnode { /* specify the "shape" of a tnode structure ... */ struct tnode *brnch[2]; /* the left and right branch pointers */ int count; /* the word count as before */ char *word; /* a pointer to the word which is located elsewhere */ } *root; /* declare the root pointer variable */ char word_buff[100]; /* make this global to avoid needless arg passing */ char *file_name = "isamfile"; void tree_walk(struct tnode *); struct tnode **nr_tree_search(struct tnode **); int get_word(char *); int main(void) { struct tnode **tpp; unlink(file_name); fd = open(file_name, O_CREAT|O_RDWR|O_EXCL, 0666); /* file creation */ while(get_word(word_buff)) { tpp = nr_tree_search(&root); if((*tpp) == NULL) { /* create a new node */ (*tpp) = malloc(sizeof(struct tnode)); (*tpp)->count = 0; (*tpp)->brnch[0] = NULL; (*tpp)->brnch[1] = NULL; (*tpp)->word = strdup(word_buff); } (*tpp)->count++;/* do this for all cases including new nodes */ } tree_walk(root); /* this will send the words to isam_output() */ isam_end(); return(0); } /* nonrecursive version returning a pointer to pointer leading to the node */ struct tnode **nr_tree_search(struct tnode **tpp) { while(*tpp) { int cmp = strcmp((*tpp)->word, word_buff); if(cmp > 0) tpp = &(*tpp)->brnch[0]; else if(cmp < 0) tpp = &(*tpp)->brnch[1]; else break; /* return the pointer to the matching node */ } return(tpp); } /* this is a recursive treewalk to print in alphabetically sorted order */ void tree_walk(struct tnode *tp) { if(tp == NULL) return; tree_walk(tp->brnch[0]); /* YES, we are cheating with isam_output by knowing to go to level 2 */ /* isam_output is called in alphabetic sequence once per unique word */ isam_output(2, tp->word, tp->count); /* this is your ISAM linkage */ //printf("%7d\t%s\n", tp->count, tp->word); /* Linux uniq format */ tree_walk(tp->brnch[1]); } #include /* * Leave this routine EXACTLY as it stands */ int get_word(char *s) { int c; do { c = getchar(); if(c == EOF) return(0); } while(!isalpha(c) && !isdigit(c)); do { if(isupper(c)) c = tolower(c); *s++ = c; c = getchar(); } while(isalpha(c) || isdigit(c)); *s = 0; return(1); }