Sunday, November 17, 2013

Elite programmers: This is a c programme to count the frequency of ea...


Elite programmers: This is a c programme to count the frequency of ea...:

/*Author : FE12A061 - FEMENCHA AZOMBO FABRICE
  Title  : A program to count the occurence of each word in a file
  Date   : 10 November, 2013 */
 
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct words
{
    char *word;        //struct words is a binary tree that stores a node and a pointer to the left and right siblings
    int count;
    struct words *left;
    struct words *right;
};
struct words *bintree(struct words **head,char *word);  //bintree is a function of type struct words and the 1st arg is a pointer to the bin tree that stores the words
void print_tree(struct words *head);    //print_tree is a function that prints a binary tree
char *readfile(char *filename);         //readfile is a function that reads a text file and stores it in a string
int total;                             //an interger to store the total number of words in the file
int
main (int argc, char *argv[])    //argc means main can take some argumments
{
    char *word=NULL,*str=NULL;
    total = 0;
    struct words *head=NULL;
   
    if((str = readfile(argv[1])) != NULL)// the 1st argument is the name of the file to be read
    {
       
        const char* delim = " ,\n\r\t.;:_-()~`|!?><%^&*$#@{}[]0123456789\'\"/";//delim is a list of all punctuation marks that seperates two words form each other
        word = strtok(str,delim);    //strtok is takes a string and seperates it into words using the delimities given above
        head = bintree(&head,word);
-+          while((word = strtok(NULL,delim)) != NULL){
            head = bintree(&head,word);
        }
        printf("Total = %d\n",total);
        printf("|%-20s | %-5s |\n%25s\n","WORDS","freq","|=============================");
        print_tree(head);
    }
    return 0;
}


void print_tree(struct words *head){
    if(head == NULL)
        return ;
    print_tree(head->right); //note that print_tree uses the post order traversal to print the tree.
    printf("|%-20s | %-5d |\n",head->word,head->count);//that is, print the word at the right subtree, the node and then left subtree
    print_tree(head->left);
    return ;   
}
char *readfile(char *filename){
    FILE *fp = fopen(filename,"r");
    char *str=NULL;
    if(fp !=NULL){
        fseek(fp,0,SEEK_END);    //moves the pointer 2 the end of the file
        long int size = ftell(fp);   //ftell determines the size of the file and store the answer in a variable call size after fseek had move the pointer to the end
        rewind(fp);  //rewind brings the pointer back to te beginning
        str = malloc(size + 1); //malloc allocate space to store the words in string called str
        fread(str,size,1,fp);  //fread read the the entire file and stores it in str.the 1 signifies it is reading the entire file as one block
        fclose(fp); //close the file after processing
    }
    else {
        printf("Cannot open file.Please make sure the file is in the current directory and try again\n") ;
        return 0 ;
    }
    return str;
}

struct words *bintree(struct words **head,char *word){
    if(*head == NULL){    //checks if the tree is empty
        *head = malloc(sizeof(struct words *));    //allocate space for the words.I.E initialise the head
        (*head)->word = malloc(sizeof(char) * strlen(word) + 1); //compute the size of the word
        strcpy((*head)->word,word); //copy the word into the word part of the tree. I.E word
        (*head)->count = 1;  //initialise the count for that word
        (*head)->left = (*head)->right = NULL; //declare new nodes to store the next words.I.E left and right subtrees
        total++; //total is just a variable that keep track of the number of words in the file
    }
    else if(strcasecmp((*head)->word,word) < 0){   //strcasecmp compares incoming word with the existing word without considering the cases
        bintree(&(*head)->left,word); //if the incomming word is less than the word already found in the tree, store it in the left subtree
    }
    else if(strcasecmp((*head)->word,word) > 0){
        bintree(&(*head)->right,word); //else store it in the right subtree
    }
    else{
        (*head)->count++; //if the words are the some then increment the count part of the tree(the word)
        total++;
    }
   
    return *head; //return the head of the tree(the parent) when all the words are in the tree
}


/*How to run the program*/

/*compile the programe, run and give the name of the file as argument of main*/




No comments:

Post a Comment