1

I'm having problem with pointers in a C program that count the occurrences of a string or more in a bunch of file. The program take in input a file which contains the paths of the files in which search the occurrences. All the files that i will mention are contained in the same folder of the project, whose name is "find". In my case, the input file is "path.txt":

C:\Users\Utente\Desktop\find\try.txt
C:\Users\Utente\Desktop\find\try1.txt

The try.txt content is:

abc
abc
abc
ac
ac
ac
ac

The try1.txt content is:

ac
ac
ac
ac
abc
abc
abc

My program is composed by 4 files, two header-files and two source files:

find.c:

#include "find.h"

int main(int argc, char * argv[]){

FILE *fInput = NULL;  
FILE *fp = NULL;
char *line1; 
char *line2;
int endOfLineDetected = 0;
size_t nrOfCharRead = 0;
char ch;

fWord *w = NULL;
fWord *start = NULL;
fWord *tail = NULL;

fPath *head = NULL;
fPath *current = NULL;


fInput = fopen(argv[1], "r"); //the file that contains the path of the file in which search.

if(fInput == NULL){
    fprintf(stderr, "Cannot open %s, exiting. . .\n", argv[1]);
    exit(1);
}

while(!endOfLineDetected){ //read line by line the input file in order to save the path in a structure
    line1 = getLineOfAnySize(fInput,128,&endOfLineDetected,&nrOfCharRead);
    fPath *node = malloc (sizeof(fPath));
    node->path = line1;
    node->fileOccurrences = 0;
    node->position = NULL;
    node->next = NULL;

    if(head == NULL){
        current = head = node;
    }else{
        current = current->next = node;
    }
}

fclose(fInput);

//create a linked list of the type fWord, one structure for each word.
do{
    fWord *app = malloc(sizeof(fWord));
    printf("Insert the word to search: ");
    scanf("%s", app->word);
    app->totalOccurences = 0;
    app->p = head;
    app->next = NULL;

    if(start == NULL){
        tail = start = app;
    }else{
        tail = tail->next = app;
    }
    printf("Do you want to insert another word? (Y/N): ");
    scanf(" %c", &ch);
}while(ch == 'y' || ch == 'Y');

w = start; //pointer back to the top of the fWord structure

//traverse all the structure and execute the algorithm
while(w != NULL){
    while(w->p != NULL){
        fp = fopen(w->p->path, "r");
        if(fp == NULL){
            fprintf(stderr, "Cannot open %s, exiting. . .\n", w->p->path);
            exit(1);
        }

        int countLine = 0;
        w->p->fileOccurrences = 0;
        endOfLineDetected = 0;

        while(!endOfLineDetected){
            line2 = getLineOfAnySize(fp,128,&endOfLineDetected,&nrOfCharRead);
            int n = strlen(line2);
            int m = strlen(w->word);
            w->p->fileOccurrences = w->p->fileOccurrences + KMP(line2, w->word, n, m, countLine, w->p);
            countLine = countLine + 1;
        }   

        w->totalOccurences = w->totalOccurences + w->p->fileOccurrences;
        w->p->position = getHead(); // //pointer back to the top of the  fPosition structure
        w->p = w->p->next;
        fclose(fp);
    }
    w->p = head; //pointer back to the top of the fPath structure
}

w = start; //pointer back to the top of the fWord structure

//traverse all the structure and print out the occurrences and their position
while(w != NULL){
    w->p = head;
    printf("WORD %s \r\n", w->word);
    printf("TOTAL %d \r\n", w->totalOccurences);
    while(w->p != NULL){
        printf("FILE %s \r\n", w->p->path);
        printf("OCCURENCES %d   \r\n", w->p->fileOccurrences);
        while (w->p->position != NULL){
            printf("%d %d\r\n", w->p->position->line, w->p->position->character);
            w->p->position = w->p->position->next;
        }
        w->p = w->p->next;
    }
    w = w->next;
}

printf("\r\n"); //the file ends with an empty line

return 0;
}

//method used for read line by line a file
char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead){ 
char *line;       // buffer for our string
int ch;           // we will read line character by character
size_t len = 0;   // number of characters read (character counter)
size_t lineSize = typicalSize;  // initial size of the buffer allocated for the line
*nrOfCharRead = 0;

if(!fp) return NULL; // protection

// allocating the buffer
line = realloc(NULL, sizeof(char)*lineSize); // expected size of the line is up to typicalSize

if (!line) return line; // protection, if we fail to allocate the memory we will return NULL

while (1) { // loop forever     
    ch = fgetc(fp);       // getting character by character from file

    if (ch == '\n') break; // end of line detected - breaking the loop 
    if( ch == EOF)  {
        *endOfLineDetected = 1;
        break; // end of file detected - breaking the loop
     }

    line[len++] = ch;     // store the character in the line buffer, increase character counter

    if (len == lineSize){ // we reached the end of line buffer (no more room)

        lineSize = lineSize + 64; // we have to increase the line size 
        line = realloc(line, sizeof(char)*(lineSize)); // line buffer has new size now

        if (!line) return line; // if we fail to allocate memory we will return NULL
    }
    if( (len == 0) && *endOfLineDetected){ // empty file
        *endOfLineDetected = 1;
        break; 
    } 
}


line[len++] ='\0';  // ending the string (notice there is no '\n' in the string)
*nrOfCharRead = len;

return line;       // return the string
}

find.h:

#include "kmp.h"

char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead);

kmp.c:

#include "kmp.h"

fPosition *head = NULL;
fPosition *current = NULL;

// Function to implement KMP algorithm
int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app){

int count = 0;

// next[i] stores the index of next best partial match
int next[n + 1];

for (int i = 0; i < n + 1; i++)
    next[i] = 0;

for (int i = 1; i < n; i++){
    int j = next[i + 1];

    while (j > 0 && Y[j] != Y[i])
        j = next[j];

    if (j > 0 || Y[j] == Y[i])
        next[i + 1] = j + 1;
}

for (int i = 0, j = 0; i < m; i++){
    if (*(X + i) == *(Y + j)){
        if (++j == n){
            count = count + 1; //count the occurrences of the string in this file   
            fPosition *node = malloc (sizeof(fPosition));
            node->line = line; //the current line
            node->character = i - j + 1; //the shift in which occurs
            node->next = NULL;

            if(head == NULL){
                current = head = node;
            }else{
                current = current->next = node;
            }

            app->position = current;

        }
    }
    else if (j > 0) {
        j = next[j];
        i--;    // since i will be incremented in next iteration
    }
}

    return count; //return the number of occurences found
}

//take the pointer back to the top of fPosition
fPosition * getHead(){ 
     fPosition *app = head;
     head = NULL;
     return app;
}

kmp.h:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

struct filePath{
char *path; //the file path
struct filePath *next;
};

struct OccurrencesPosition{
int line; //line in which an occurrence is founded
int character; //shift at which the occurrences comes

struct filePath pathInfo;

struct OccurrencesPosition *next; //pointer to the next occurrences
};

struct fileWord{
char word[50]; //the string to search
int totalOccurences; //the total occurrences of the string

int fileOccurrences; //the occurrences of each file
struct OccurrencesPosition *position; //pointer to the linked list which tracks all the occurrences and their positions

struct fileWord *next; //pointer to the next word
};

typedef struct filePath fPath; 
typedef struct fileWord fWord;
typedef struct OccurrencesPosition fPosition;

fPosition * getHead();


int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app);

The problem is that when i run my program passing in input "abc" and "ac" it returns wrong value. More precisely, returns the value corresponding to "ac" in both cases. Here's the execution:

PS C:\Users\Utente\Desktop\find> gcc find.c kmp.c -o "find.exe"
PS C:\Users\Utente\Desktop\find> .\find.exe "path.txt"
Insert the word to search: abc
Do you want to insert another word? (Y/N): Y
Insert the word to search: ac
Do you want to insert another word? (Y/N): N
WORD abc 
TOTAL 6 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 4    
3 0
4 0
5 0
6 0
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 4    
0 0
1 0
2 0
3 0
WORD ac 
TOTAL 8 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 4    
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 4

As you can see, the WORD and TOTAL are correct in both cases, but the occurrences not. They correspond to "ac" in both cases. The correct output should be:

WORD abc 
TOTAL 6 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 3    
0 0
0 1
0 2
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 3   
4 0
5 0
6 0
WORD ac 
TOTAL 8 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 4
3 0
4 0
5 0
6 0    
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 4
0 0
1 0
2 0
3 0

I think that the problem is with the fPosition pointers. Thanks to anyone who helps.

6
  • Some slight nitpicking on your terminology: C doesn't have "classes". What you seem to be calling a class is a file. You have two source files (find.c and kmp.c) and two header files (find.h and kmp.h). Commented Nov 11, 2019 at 12:23
  • Edited correctly, thanks! Commented Nov 11, 2019 at 12:34
  • 1
    It is too much code to debug without any meaningful names to variables. Commented Nov 11, 2019 at 12:35
  • 1
    It is not clear what is not correct. Please show how the expected output would look like for your example input. Commented Nov 11, 2019 at 12:36
  • As you can see in the input files. The "abc" are in total 6, 3 for file, but the program print out the "ac" occurrences in both cases. Commented Nov 11, 2019 at 12:38

1 Answer 1

2

You have design issue.

The problem is occurrences info you are maintaining as part of filePath list.

struct filePath{
    char *path; //the file path
    int fileOccurrences; //the occurrences of each file
    struct OccurrencesPosition *position;  // here *****************
    struct filePath *next;
};

And file path info you are maintaining as part of fileWord list.

struct fileWord{
    char word[50]; //the string to search
    int totalOccurences; //the total occurrences of the string
    struct filePath *p; //pointer to the linked list of all the files
    struct fileWord *next; //pointer to the next word
};

Since you only have one file path list, each word in fileWord list is actually pointing to same filepath list.


Every word is pointing to same file path list

fWord *app = malloc(sizeof(fWord));
printf("Insert the word to search: ");
scanf("%s", app->word);

app->p = head; //here

and you are updating the position info inside the filepath for every word.

      w->p->position = getHead(); // //pointer back to the top of the  fPosition structure

Thus filePath list is holding position info only for the latest word you search.


Update:

Your design should look as below.

struct filePath{
    char *path; //the file path
    struct filePath *next;
};

struct OccurrencesPosition{
    int line; //line in which an occurrences is founded
    int character; //shift at which the occurrences comes

    struct filePath pathInfo;

    struct OccurrencesPosition *next; //pointer to the next occurrences
};

struct fileWord{
    char word[50]; //the string to search
    int totalOccurences; //the total occurrences of the string

    int fileOccurrences; //the occurrences of each file
    struct OccurrencesPosition *position; //pointer to the linked list which tracks all the occurrences and their positions

    struct fileWord *next; //pointer to the next word
};
Sign up to request clarification or add additional context in comments.

5 Comments

Thanks, how should i change my code in order to solve the problem?
@gianfranco Maintain the position info as part of fword.
Could you please show me briefly how shoul i change my program?
@gianfranco I have given you the how design should look, please update your code accordingly.
I will do it asap. Thank you so much

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.