1

I'm playing around with the "fat-pointer" idea of the string. Basically I have header structure holding capacity and length information. I allocate it with preset length of characters then return the pointer to the first character. When I want header info I subtract 'sizeof' header.

All functions are working properly the way I expect them to except for the resize function:

typedef uint8_t* utf8;

/*
 * Resize string
 */
bool string_resize( utf8 *str, size_t room ) {
    utf8* p = str;

    struct string_header *hdr = (string_header_t *) (*p - sizeof(string_header_t));

    size_t cap = hdr->capacity;
    size_t len = hdr->length;

    /* Backup the current capacity if the process fails */
    size_t bck = cap;

    if ( len + room <= cap ) {
        //printf("::hit\n");
        return true;
    }

    cap = len + room;

    if ( cap < MAX_PREALLOC ) {
        cap *= 2;
    } else {
        cap += MAX_PREALLOC;
    }

    hdr->capacity = cap;

    void * new = realloc( hdr, sizeof(string_header_t) + cap + 1 );

    if ( new == NULL ) {
        hdr->capacity = bck;
        return false;
    }

    *str = (utf8) new + sizeof(string_header_t);
    /* Remove garbage if there is any  after the string content */
    memset( *str+len, 0, cap-len + 1 );
    return true;
}

Valgrind returns the error that I read in memory not allocated by malloc (always happens when trying to access the new parts of the string).

As You see I use (without typedef) uint8_t** so I should be passing correct pointer to pointer to the function and then changing it.

Any help greatly appreciated.

[update 1] Additional functions for the context of string manipulation:

typedef struct string_header {
     size_t capacity;
     size_t length;
} string_header_t;

/*
 * Allocate the string with the prefered length.
 */
utf8 string_alloc( size_t len ) {
    struct string_header *hdr = calloc(1, sizeof(string_header_t) + sizeof(uint8_t) * len);
    assert( hdr );
    hdr->capacity = len;
    hdr->length   = 0;
    return ((utf8) hdr) + sizeof(string_header_t);
}

/*
 * Allocate the new string with the initial default capacity.
 */
utf8 string_new() {
    return string_alloc( INITIAL_CAPACITY );
}

/*
 * Delete the string.
 */
void string_dealloc( utf8 self ) {
    if ( self == NULL )
        return;
    string_header_t *hdr = (string_header_t *) (self - sizeof(string_header_t));
    free(hdr);
}

static inline void string_push( utf8 s, char c ) {
    string_header_t* hdr = (string_header_t *) (s - sizeof(string_header_t));
    //*(s + hdr->length++) = (uint8_t) c;
    size_t len = hdr->length++;

    s[len] = c;
}

bool string_append_char( utf8 str, char c ) {
    if ( string_resize(&str, 1) != ARDP_SUCCESS )
        return ARDP_FAILURE;

    string_push( str, c );
    return ARDP_SUCCESS;
}

bool string_append_utf8( utf8 s, int cp ) {
    if ( cp < 0 or cp > 0x10ffff ) {
        return false;
    }
    else if ( cp < 0x80 ) {
        return string_append_char(s, cp & 0x7F);
    }
    else if ( cp < 0x800 ) {
        if ( string_resize( &s, 2 ) isnt ARDP_SUCCESS )
            return false;
        string_push( s, 0xC0 | ((cp >> 6) & 0x1F) );
        string_push( s, 0x80 | (cp & 0x3F) );
    }
    else if ( cp < 0x10000 ) {
        if ( string_resize( &s, 3 ) isnt ARDP_SUCCESS )
            return false;
        string_push( s, 0xE0 | ((cp >> 12) & 0xF) );
        string_push( s, 0x80 | ((cp >> 6)  & 0x3F) );
        string_push( s, 0x80 |  (cp & 0x3F) );
    }
    else {
        if ( string_resize( &s, 4 ) isnt ARDP_SUCCESS )
            return false;
        string_push( s, 0xF0 | ((cp >> 18) & 0x7) );
        string_push( s, 0x80 | ((cp >> 12) & 0x3F) );
        string_push( s, 0x80 | ((cp >> 6)  & 0x3F) );
        string_push( s, 0x80 |  (cp & 0x3F) );
    }
    return true;
}

bool string_finish( utf8 str ) {
    if ( string_resize(&str, 1) )
        return false;

    string_header_t *hdr = (string_header_t *) (str - sizeof(string_header_t));
    *(str + hdr->length) = '\0';
     return true;
}

[update 2] Valgrind logs (all of them are almost same as this):

==96370== Invalid read of size 8
==96370==    at 0x100011201: string_append_char (string.c:68)
==96370==    by 0x100000AE7: test_string (example.c:84)  
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Address 0x100aac6d0 is 0 bytes inside a block of size 24 free'd
==96370==    at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x100011243: string_append_char (string.c:92)
==96370==    by 0x100000ADA: test_string (example.c:83)
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Block was alloc'd at
==96370==    at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x1000110F2: string_new (string.c:38)
==96370==    by 0x100000A5A: test_string (example.c:72)
==96370==    by 0x100000BEA: main (example.c:106)

==96370== Invalid write of size 8
==96370==    at 0x100011274: string_append_char (string.h:44)
==96370==    by 0x100000AE7: test_string (example.c:84)
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Address 0x100aac6d8 is 8 bytes inside a block of size 24 free'd
==96370==    at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x100011243: string_append_char (string.c:92)
==96370==    by 0x100000ADA: test_string (example.c:83)
==96370==    by 0x100000BEA: main (example.c:106)
==96370==  Block was alloc'd at
==96370==    at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370==    by 0x1000110F2: string_new (string.c:38)
==96370==    by 0x100000A5A: test_string (example.c:72)
==96370==    by 0x100000BEA: main (example.c:106)

[update 3] Some example code:

void test_string(void) {
    utf8 str = string_new();

    string_debug( str );
    string_append_char( str, 'h');
    string_append_char( str, 't');
    string_append_char( str, 't');
    string_append_char( str, 'p'); 
    string_append_char( str, ':');
    string_append_char( str, '/');
    string_append_char( str, '/');
    string_append_char( str, 'g');
    string_append_char( str, 'o');
    string_append_char( str, 'o');
    string_append_char( str, 'g');
    string_append_char( str, 'l');
    string_append_char( str, 'e');
    string_append_char( str, '.');
    string_append_char( str, 'c');
    string_append_char( str, 'o');
    string_append_char( str, 'm');
    string_append_char( str, '/');
    string_append_char( str, '?');
    string_append_char( str, 's');
    string_append_char( str, '=');
    string_append_char( str, 'f');
    string_append_char( str, 'i');
    string_append_char( str, 's');
    string_append_char( str, 'h');

    //string_finish(str);

    printf("String %s", str);

    string_dealloc(str);
}
6
  • 2
    #define is == #define isnt != - please don't! Commented Dec 25, 2015 at 22:48
  • Could you also post the error which valgrind shows? Commented Dec 26, 2015 at 1:46
  • Updated the question with Valgrind logs ... I played around with it a little and it seems that the pointer is not being changed ... (it still points to the old address) ... for the subsequent calls. Commented Dec 26, 2015 at 7:28
  • string_resize() don't return the address of the new allocation. There's no way for the caller to know anything about the new memory block. Therefore they continue to scribble on the freed memory. Commented Dec 26, 2015 at 7:42
  • 1
    @MichaelBurr: Close. string_resize does actually return the address of the new allocation, because it takes an uint8_t** as a parameter, allowing the uint8_t* to be modified. But the function which calls string_resize doesn't. Commented Dec 26, 2015 at 7:46

2 Answers 2

2

You're trying to use a character string pointer as a proxy for a string structure. But the string structure might get reallocated, and thus the address of the string might change. In order for the caller to (for example) string_append_char to be aware of the change, they would have to have some mechanism to receive the new value of the string pointer. But they don't; they pass in a uint8_t* and get back a bool. If the append caused reallocation, the new address will be lost once string_append_char returns.

You could do that by passing a handle (i.e. an uint8_t**) instead of a simple uint8_t*. But in many ways that defeats the point of the interface. At the very least, you'll end up with some calls using &str and others str, which will make your code fragile and hard to read.

Really, you might as well just use a string structure directly, and include an inline function to extract a C-string pointer, similar to the C++ interface. The extra level of indirection might seem a little inefficient, but it has proven to be a lot easier to program with.

Sign up to request clarification or add additional context in comments.

6 Comments

Yeah ... For normal use it's the right answer... But since this was for language experiments and I did lots of weird things in code trying to break'n'make it in the end I used passing the hadle to fuctions manipulating the string and pointer to string to create/destroy functions.
@jr.root.cs: fair enough. You might want to consider the simpler expressions (uint8_t*)(@hdr[1]) and ((string_header_t*)str)[-1]
... and maybe wrapping in inline static functions hdr2str and str2hdr
in my code added the string header cast as DEFINE macro thx! looks lot's better
@jr.notes.cs: a typo. I meant &hdr[1] but hdr + 1 would be the same.
|
0

Here's a "smart" string suite I wrote for another SO answer a while back. It may help a bit:

// javapgmr/xstr -- "smart" string "class" for C

typedef struct {
    size_t xstr_maxlen;                 // maximum space in string buffer
    char *xstr_lhs;                     // pointer to start of string
    char *xstr_rhs;                     // pointer to current string append
} xstr_t;

// xstrinit -- reset string buffer
void
xstrinit(xstr_t *xstr)
{

    memset(xstr,0,sizeof(xstr));
}

// xstragain -- reset string buffer
void
xstragain(xstr_t xstr)
{

    xstr->xstr_rhs = xstr->xstr_lhs;
}

// xstrgrow -- grow string buffer
void
xstrgrow(xstr_t *xstr,size_t needlen)
{
    size_t curlen;
    size_t newlen;
    char *lhs;

    lhs = xstr->xstr_lhs;

    // get amount we're currently using
    curlen = xstr->xstr_rhs - lhs;

    // get amount we'll need after adding the whatever
    newlen = curlen + needlen + 1;

    // allocate more if we need it
    if ((newlen + 1) >= xstr->xstr_maxlen) {
        // allocate what we'll need plus a bit more so we're not called on
        // each add operation
        xstr->xstr_maxlen = newlen + 100;

        // get more memory
        lhs = realloc(lhs,xstr->xstr_maxlen);
        xstr->xstr_lhs = lhs;

        // adjust the append pointer
        xstr->xstr_rhs = lhs + curlen;
    }
}

// xstraddchar -- add character to string
void
xstraddchar(xstr_t *xstr,int chr)
{

    // get more space in string buffer if we need it
    xstrgrow(xstr,1);

    // add the character
    *xstr->xstr_rhs++ = chr;

    // maintain the sentinel/EOS as we go along
    *xstr->xstr_rhs = 0;
}

// xstraddstr -- add string to string
void
xstraddstr(xstr_t *xstr,const char *str)
{
    size_t len;

    len = strlen(str);

    // get more space in string buffer if we need it
    xstrgrow(xstr,len);

    // add the string
    memcpy(xstr->xstr_rhs,str,len);
    xstr->xstr_rhs += len;

    // maintain the sentinel/EOS as we go along
    *xstr->xstr_rhs = 0;
}

// xstrcstr -- get the "c string" value
char *
xstrcstr(xstr_t *xstr,int chr)
{

    return xstr->xstr_lhs;
}

// xstrfree -- release string buffer data
void
xstrfree(xstr_t *xstr)
{
    char *lhs;

    lhs = xstr->xstr_lhs;
    if (lhs != NULL)
        free(lhs);

    xstrinit(xstr);
}

2 Comments

Thank! I have the similar smart string but I wanted to make it little smarter because if I use the concealment of the header (as is shown in my function at the top) then for the world the string appears as clasic char* string and can be simply put to the printf() and such because its pointing to first string character. I'm rather searching what to do with the realloc function, if it is correct or if I should modify the functions to recognize the pointer change.
I've reviewed your resize and it seems okay. But, I'd edit your question and post the struct definition and a few more functions as the root of the problem may be in the initial creation of the header [not shown]. Also, whenever I do "hidden" structs like this, I put a magic number signature in the struct to check for corruption/misalignment. The "smart" struct is like C++ std::string. It's usually a benefit that all caller's pass around pointers to it (e.g. many useful funcs possible). Getting the C string is minor as in my xstrcstr

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.