Sunday, November 17, 2013
Elite programmers: This is a c programme to count the frequency of ea...
Elite programmers: This is a c programme to count the frequency of ea...:
/*Author : FE12A061 - FEMENCHA AZOMBO FABRICE
Title : A program to count the occurence of each word in a file
Date : 10 November, 2013 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct words
{
char *word; //struct words is a binary tree that stores a node and a pointer to the left and right siblings
int count;
struct words *left;
struct words *right;
};
struct words *bintree(struct words **head,char *word); //bintree is a function of type struct words and the 1st arg is a pointer to the bin tree that stores the words
void print_tree(struct words *head); //print_tree is a function that prints a binary tree
char *readfile(char *filename); //readfile is a function that reads a text file and stores it in a string
int total; //an interger to store the total number of words in the file
int
main (int argc, char *argv[]) //argc means main can take some argumments
{
char *word=NULL,*str=NULL;
total = 0;
struct words *head=NULL;
if((str = readfile(argv[1])) != NULL)// the 1st argument is the name of the file to be read
{
const char* delim = " ,\n\r\t.;:_-()~`|!?><%^&*$#@{}[]0123456789\'\"/";//delim is a list of all punctuation marks that seperates two words form each other
word = strtok(str,delim); //strtok is takes a string and seperates it into words using the delimities given above
head = bintree(&head,word);
-+ while((word = strtok(NULL,delim)) != NULL){
head = bintree(&head,word);
}
printf("Total = %d\n",total);
printf("|%-20s | %-5s |\n%25s\n","WORDS","freq","|=============================");
print_tree(head);
}
return 0;
}
void print_tree(struct words *head){
if(head == NULL)
return ;
print_tree(head->right); //note that print_tree uses the post order traversal to print the tree.
printf("|%-20s | %-5d |\n",head->word,head->count);//that is, print the word at the right subtree, the node and then left subtree
print_tree(head->left);
return ;
}
char *readfile(char *filename){
FILE *fp = fopen(filename,"r");
char *str=NULL;
if(fp !=NULL){
fseek(fp,0,SEEK_END); //moves the pointer 2 the end of the file
long int size = ftell(fp); //ftell determines the size of the file and store the answer in a variable call size after fseek had move the pointer to the end
rewind(fp); //rewind brings the pointer back to te beginning
str = malloc(size + 1); //malloc allocate space to store the words in string called str
fread(str,size,1,fp); //fread read the the entire file and stores it in str.the 1 signifies it is reading the entire file as one block
fclose(fp); //close the file after processing
}
else {
printf("Cannot open file.Please make sure the file is in the current directory and try again\n") ;
return 0 ;
}
return str;
}
struct words *bintree(struct words **head,char *word){
if(*head == NULL){ //checks if the tree is empty
*head = malloc(sizeof(struct words *)); //allocate space for the words.I.E initialise the head
(*head)->word = malloc(sizeof(char) * strlen(word) + 1); //compute the size of the word
strcpy((*head)->word,word); //copy the word into the word part of the tree. I.E word
(*head)->count = 1; //initialise the count for that word
(*head)->left = (*head)->right = NULL; //declare new nodes to store the next words.I.E left and right subtrees
total++; //total is just a variable that keep track of the number of words in the file
}
else if(strcasecmp((*head)->word,word) < 0){ //strcasecmp compares incoming word with the existing word without considering the cases
bintree(&(*head)->left,word); //if the incomming word is less than the word already found in the tree, store it in the left subtree
}
else if(strcasecmp((*head)->word,word) > 0){
bintree(&(*head)->right,word); //else store it in the right subtree
}
else{
(*head)->count++; //if the words are the some then increment the count part of the tree(the word)
total++;
}
return *head; //return the head of the tree(the parent) when all the words are in the tree
}
/*How to run the program*/
/*compile the programe, run and give the name of the file as argument of main*/
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment