0

RHEL6

I'm trying to implement a perl split funciton in a C subroutine which dynamically builds the array of strings. My attempt fails with a segfault. But it does not fail if I comment out the printf statement in the for loop (perhaps implying that the segfault is in where its getting built as opposed to how)

Here it is...

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *s, char **arr);


void main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc"; 
  char **arr;


  arrsz=split(str,arr);


  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  exit(0);
}

/***********************************/
int split(char *str, char **arr) {

  int arrsz=0;
  char delim[2] = ":";
  char *tok;

  arr = malloc(sizeof(char **));
  arr[0] = malloc(1);
  arr[0] = '\0';

  tok = strtok(str,delim);
  while(tok != NULL) {
    arrsz++;
    arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
    arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(arr[arrsz-1],tok);
    arr[arrsz]=malloc(1);
    arr[arrsz]='\0';

    tok = strtok(NULL,delim);
  }

  return(arrsz);
}

I think the problem is in how I'm passing "arr" to the split function or how it's being received and used in the function. I say this because if I move the body of the function to main, it works there.

I tried dealing with arr inside the functions as it it was a (char ***), but that didn't work.

Can a C expert out there set me straight ?

9
  • 2
    regarding: void main(int argc, char* argv[]) regardless of what visual studio will allow, all valid signatures for the main() function have a return type of int Commented Apr 6, 2018 at 16:34
  • 2
    arr[0] = malloc(1); arr[0] = '\0'; looks highly suspect. Commented Apr 6, 2018 at 16:35
  • when the parameters to main() are not going to be used, the correct signature is: int main( void ) Commented Apr 6, 2018 at 16:35
  • regarding: ` arr[0] = malloc(1); arr[0] = '\0';` this creates a pointer to a one byte allocation then immediately overlays that pointer with a NUL byte. Probably not what you want Commented Apr 6, 2018 at 16:40
  • when calling any of the heap allocation functions (malloc calloc realloc), 1) the return type is void* so can be assigned to any pointer. Casting just clutters the code, making it more difficult to understand, debug, etc 2) always check (!=NULL) the returned value to assure the operation was successful. When calling realloc() always assign to a 'temp pointer and check for NULL. Assigning directory to the target variable will result in a memory leak when realloc fails Commented Apr 6, 2018 at 16:44

5 Answers 5

2

The main error is that you should pass a pointer to the strings list to the split function, not the strings list itself, so you should use an ***arr:

int split(char *str, char ***arr);

And you should use & to pass the pointer in main:

...
arrsz=split(str,&arr);
...

In the function you could use a double pointer to avoid confusion and at the end assign that pointer to the parameter:

int split(char *str, char ***arrreturn) {
     char **arr;    //Use this strings list to add the strings

     ...

     *arreturn = arr;
     return(arrsz);
}

-You should not call realloc anytime you need to insert a string, but you could oversize it and increment its dimension if you need.

-I cannot see the need of assign '\0' at the end of the list if you have a variable with the length

-You can use strdup instead of malloc-strcpy funcs:

char *first = "ciao";
char *str = malloc(strlen(first) * sizeof(char));
strcpy(str, first);

Is equal to:

char *first = "ciao";
char *str = strdup(first);

I corrected your code:

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *str, char ***arrreturn);


void main(int argc, char *argv[]) {
    int x;
    int arrsz;
    char str[] = "aaa:bbb:ccc";
    char **arr;


    arrsz = split(str, &arr);


    for (x = 0; x < arrsz; x++) {
        printf("%s\n", arr[x]);
    }

    exit(0);
}

/***********************************/
int split(char *str, char ***arrreturn) {

    int arrsz = 1;
    int len = 0;
    char delim[2] = ":";
    char *tok;
    char **arr;

    arr = malloc(sizeof(char **));

    tok = strtok(str, delim);
    while (tok != NULL) {
        len++;
        if (len >= arrsz) {
            arrsz *= 2;
            arr = realloc(arr, arrsz * sizeof(char **));
        }
        arr[len - 1] = strdup(tok);
        tok = strtok(NULL, delim);
    }
    *arrreturn = arr;
    return (len);
}
Sign up to request clarification or add additional context in comments.

1 Comment

char **arr; arr = malloc(sizeof(char **));... realloc(arr, arrsz * sizeof(char **)) is not using the matching type to determine size (One too many *). Use char **arr = malloc(sizeof *arr);. It is easier to code correctly, review and maintain.
2

There are a few bugs. I've annotated and [partially] fixed bugs. It will still segfault. I added a refactored version that will work correctly.

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *s, char **arr);

void main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc";
  char **arr;

#if 1
#endif

  arrsz=split(str,arr);

  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  exit(0);
}

/***********************************/
int split(char *str, char **arr) {

  int arrsz=0;
  char delim[2] = ":";
  char *tok;

  // NOTE/BUG: this function only changes arr within the function and does
  // _not_ propagate it to the caller

  arr = malloc(sizeof(char **));

  // NOTE/BUG: this is replaced in the loop and leaks memory
#if 0
  arr[0] = malloc(1);
  arr[0] = '\0';
#endif

  tok = strtok(str,delim);
  while(tok != NULL) {
    arrsz++;

    // NOTE/BUG: this is incorrect -- it only adds a byte instead of another
    // pointer (i.e. it doesn't allocate enough)
#if 0
    arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
#else
    arr = (char **)realloc(arr,sizeof(char *) * (arrsz + 1));
#endif

#if 0
    arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(arr[arrsz-1],tok);
#else
    arr[arrsz-1] = strdup(tok);
#endif

    // NOTE/BUG: this is wrong and leaks memory
#if 0
    arr[arrsz]=malloc(1);
    arr[arrsz]='\0';
#endif

    tok = strtok(NULL,delim);
  }

#if 1
  arr[arrsz] = NULL;
#endif

  return(arrsz);
}

But, as written, your function doesn't update caller's value of arr.

To fix your function, split would need arr to be defined as a "three star" pointer (e.g. char ***arr) which is considered cumbersome and very bad practice.

So, a better/simpler solution is to refactor the function and pass back arr as return (e.g. char **split(char *str,int *sizrtn):

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char **split(char *s, int *arsiz);

int main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc";
  char **arr;

  arrsz = 0;
  arr = split(str,&arrsz);

  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  return 0;
}

/***********************************/
char **split(char *str, int *sizrtn)
{
  int arrsz=0;
  const char *delim = ":";
  char *tok;
  char **arr = NULL;

  tok = strtok(str,delim);
  while (tok != NULL) {
    arrsz++;
    arr = realloc(arr,sizeof(char *) * (arrsz + 1));

    arr[arrsz - 1] = strdup(tok);

    tok = strtok(NULL,delim);
  }

  if (arr == NULL)
    arr = malloc(sizeof(*arr));

  arr[arrsz] = NULL;

  *sizrtn = arrsz;

  return arr;
}

Comments

0

To modify an object in the caller's scope you must pass a pointer to the object - so you need one more level of indirection. There is also at least one semantic error in the implementation - assigning '\0' to the pointer returned by malloc(), will both invalidate the pointer and cause a memory leak.

Change split() prototype to:

int split( char* s, char*** arr ) ;

Then call it thus:

arrsz = split( str, &arr ) ; 

And change the implementation:

int split( char* str, char*** arr ) 
{
  int arrsz = 0 ;
  char delim[2] = ":" ;
  char* tok ;

  *arr = malloc(sizeof(char**));
  *arr[0] = malloc(1);
  **arr[0] = '\0';         // <<< This is fixed too

  tok = strtok( str, delim ) ;
  while( tok != NULL ) 
  {
    arrsz++;
    *arr = (char **)realloc(*arr,(arrsz*sizeof(char *))+1);
    *arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(*arr[arrsz-1],tok);
    *arr[arrsz]=malloc(1);
    *arr[arrsz]='\0';

    tok = strtok(NULL,delim);
  }

  return(arrsz);
}

There may be other errors I have not spotted, but that is fundamental. Best from hereon debugged using a debugger rather then Q&A.

3 Comments

I still get a segfault, only this time I get it even if I comment out the printf. IOW, seems like its in the function this time.
@daveg : As I said, the code has multiple issues, but how to return an array to a caller was the only issue you have asked about. To debug the code I'd have to build and run it, but rather then using SO as a debugging service, you should learn to debug. If I did it for you, I'd use a debugger - you can do that, and it would serve you well to learn how. Moreover, to see why the code now fails we'd need your modified code - which may differ from mine - best to post a new question - this one is answered.
Ok, I took a look at it in a debugger, mu conclusion is that it is so broken, I'd suggest starting again. Since that is not what the is question is about, I am nor suggesting anything (though other answers have done so). A solution using strdup() might help simplify things.
0

the following proposed code:

  1. cleanly compiles
  2. performs the desired functionality
  3. properly checks for errors from system functions
  4. eliminates any need to use a *** parameter -- google three star programer as to why that is bad
  5. does not include header files those contents are not used

and now, the proposed code:

//#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char ** split(char *str, size_t *arrsz);


int main( void )
{
    size_t x;
    size_t arrsz;
    char str[]="aaa:bbb:ccc";

    char **arr=split(str,&arrsz);


    for(x=0;x<arrsz;x++)
    {
        printf("%s\n",arr[x]);
    }

    exit(0);
}


/***********************************/
char ** split(char *str, size_t *arrsz)
{
    char **arr = NULL;
    size_t count = 0;

    char delim[2] = ":";
    char *tok;

    tok = strtok(str,delim);
    while(tok != NULL)
    {
        count++;
        char **temp = realloc(arr,(count*sizeof(char *)));
        if( !temp )
        {
            perror( "malloc failed" );
            // perform cleanup and
            free( arr );
            exit( EXIT_FAILURE );
        }

        arr = temp;

        arr[count-1] = strdup( tok );
        if( !arr[count-1] )
        {
            perror( "strdup failed" );
            // perform cleanup and
            free( arr );
            exit( EXIT_FAILURE );
        }

        tok = strtok(NULL,delim);
    }

    *arrsz = count;
    return( arr );
}

Comments

0

OP's code does not return the allocated memory assigned to arr

int split(char *str, char **arr) {
  ...
  // Memory allocated and assigned to local `arr`
  // Yet `arr` is not returned.
  // Calling code never sees the result of this assignment.
  arr = malloc(sizeof(char **));
  ...
  return(arrsz);
}

Instead, I took a whole new approach to mimic split /PATTERN/,EXPR.

I really wanted to avoid all the ** and *** programming.

IMO, a split() should not change the expression so directly using strtok() is out. A common implementation of strtok() effectively does a strspn() and strcspsn(), so coding those directly avoids the strtok().

The below returns a string list type. Various other function signatures could be used, this return type seemed natural for OP's goal. Another solution might return a NULL terminated array of char * pointers.

When memory allocations fails, it is detected and then code calls TBD_Code();. Unclear how OP wants to handle that. Code could print a message and exit or attempt some recovery.

#include <stdlib.h>
#include <string.h>

typedef struct {
  size_t n;
  char **strings;
} string_list;

string_list split(const char *pattern, const char *expr) {
  string_list list = { 0, NULL };
  size_t length;

  // Find length of initial matching characters
  while ((length = strspn(expr, pattern)), expr[length]) {
    // Skip leading characters from `expr` that match the pattern
    expr += length;
    // Find length of characters NOT from the pattern
    length = strcspn(expr, pattern);
    // Allocate for 1 more pointer
    void *tmp = realloc(list.strings, sizeof *(list.strings) * (list.n + 1));
    if (tmp == NULL) TBD_Code();
    list.strings = tmp;
    //Allocate for the token and save it
    list.strings[list.n] = malloc(length + 1u);
    if (list.strings[list.n] == 0) TBD_Code();
    memcpy(list.strings[list.n], expr, length);
    list.strings[list.n][length] = '\0';
    // Advance
    list.n++;
    expr += length;
  }
  return list;
}

void string_list_free(string_list list) {
  if (list.strings) {
    for (size_t i = 0; i < list.n; i++) {
      free(list.strings[i]);
    }
    free(list.strings);
  }
}

Test code

#include <stdio.h>

void print_string_list(string_list list) {
  for (size_t i = 0; i < list.n; i++) {
    printf("%zu: <%s>\n", i, list.strings[i]);
  }
  string_list_free(list);
}

int main(void) {
  print_string_list(split(":", "aaa:bbb:ccc"));
  print_string_list(split(":b", "aaa:bbb:ccc"));
  print_string_list(split("a:", "aaa:bbb:ccc"));
  print_string_list(split(":c", "aaa:bbb:ccc"));
}

Output

0: <aaa>
1: <bbb>
2: <ccc>
0: <aaa>
1: <ccc>
0: <bbb>
1: <ccc>
0: <aaa>
1: <bbb>

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.