I'm playing around with the "fat-pointer" idea of the string. Basically I have header structure holding capacity and length information. I allocate it with preset length of characters then return the pointer to the first character. When I want header info I subtract 'sizeof' header.
All functions are working properly the way I expect them to except for the resize function:
typedef uint8_t* utf8;
/*
* Resize string
*/
bool string_resize( utf8 *str, size_t room ) {
utf8* p = str;
struct string_header *hdr = (string_header_t *) (*p - sizeof(string_header_t));
size_t cap = hdr->capacity;
size_t len = hdr->length;
/* Backup the current capacity if the process fails */
size_t bck = cap;
if ( len + room <= cap ) {
//printf("::hit\n");
return true;
}
cap = len + room;
if ( cap < MAX_PREALLOC ) {
cap *= 2;
} else {
cap += MAX_PREALLOC;
}
hdr->capacity = cap;
void * new = realloc( hdr, sizeof(string_header_t) + cap + 1 );
if ( new == NULL ) {
hdr->capacity = bck;
return false;
}
*str = (utf8) new + sizeof(string_header_t);
/* Remove garbage if there is any after the string content */
memset( *str+len, 0, cap-len + 1 );
return true;
}
Valgrind returns the error that I read in memory not allocated by malloc (always happens when trying to access the new parts of the string).
As You see I use (without typedef) uint8_t** so I should be passing correct pointer to pointer to the function and then changing it.
Any help greatly appreciated.
[update 1] Additional functions for the context of string manipulation:
typedef struct string_header {
size_t capacity;
size_t length;
} string_header_t;
/*
* Allocate the string with the prefered length.
*/
utf8 string_alloc( size_t len ) {
struct string_header *hdr = calloc(1, sizeof(string_header_t) + sizeof(uint8_t) * len);
assert( hdr );
hdr->capacity = len;
hdr->length = 0;
return ((utf8) hdr) + sizeof(string_header_t);
}
/*
* Allocate the new string with the initial default capacity.
*/
utf8 string_new() {
return string_alloc( INITIAL_CAPACITY );
}
/*
* Delete the string.
*/
void string_dealloc( utf8 self ) {
if ( self == NULL )
return;
string_header_t *hdr = (string_header_t *) (self - sizeof(string_header_t));
free(hdr);
}
static inline void string_push( utf8 s, char c ) {
string_header_t* hdr = (string_header_t *) (s - sizeof(string_header_t));
//*(s + hdr->length++) = (uint8_t) c;
size_t len = hdr->length++;
s[len] = c;
}
bool string_append_char( utf8 str, char c ) {
if ( string_resize(&str, 1) != ARDP_SUCCESS )
return ARDP_FAILURE;
string_push( str, c );
return ARDP_SUCCESS;
}
bool string_append_utf8( utf8 s, int cp ) {
if ( cp < 0 or cp > 0x10ffff ) {
return false;
}
else if ( cp < 0x80 ) {
return string_append_char(s, cp & 0x7F);
}
else if ( cp < 0x800 ) {
if ( string_resize( &s, 2 ) isnt ARDP_SUCCESS )
return false;
string_push( s, 0xC0 | ((cp >> 6) & 0x1F) );
string_push( s, 0x80 | (cp & 0x3F) );
}
else if ( cp < 0x10000 ) {
if ( string_resize( &s, 3 ) isnt ARDP_SUCCESS )
return false;
string_push( s, 0xE0 | ((cp >> 12) & 0xF) );
string_push( s, 0x80 | ((cp >> 6) & 0x3F) );
string_push( s, 0x80 | (cp & 0x3F) );
}
else {
if ( string_resize( &s, 4 ) isnt ARDP_SUCCESS )
return false;
string_push( s, 0xF0 | ((cp >> 18) & 0x7) );
string_push( s, 0x80 | ((cp >> 12) & 0x3F) );
string_push( s, 0x80 | ((cp >> 6) & 0x3F) );
string_push( s, 0x80 | (cp & 0x3F) );
}
return true;
}
bool string_finish( utf8 str ) {
if ( string_resize(&str, 1) )
return false;
string_header_t *hdr = (string_header_t *) (str - sizeof(string_header_t));
*(str + hdr->length) = '\0';
return true;
}
[update 2] Valgrind logs (all of them are almost same as this):
==96370== Invalid read of size 8
==96370== at 0x100011201: string_append_char (string.c:68)
==96370== by 0x100000AE7: test_string (example.c:84)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Address 0x100aac6d0 is 0 bytes inside a block of size 24 free'd
==96370== at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x100011243: string_append_char (string.c:92)
==96370== by 0x100000ADA: test_string (example.c:83)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Block was alloc'd at
==96370== at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x1000110F2: string_new (string.c:38)
==96370== by 0x100000A5A: test_string (example.c:72)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Invalid write of size 8
==96370== at 0x100011274: string_append_char (string.h:44)
==96370== by 0x100000AE7: test_string (example.c:84)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Address 0x100aac6d8 is 8 bytes inside a block of size 24 free'd
==96370== at 0x1000098B8: realloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x100011243: string_append_char (string.c:92)
==96370== by 0x100000ADA: test_string (example.c:83)
==96370== by 0x100000BEA: main (example.c:106)
==96370== Block was alloc'd at
==96370== at 0x100009551: calloc (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==96370== by 0x1000110F2: string_new (string.c:38)
==96370== by 0x100000A5A: test_string (example.c:72)
==96370== by 0x100000BEA: main (example.c:106)
[update 3] Some example code:
void test_string(void) {
utf8 str = string_new();
string_debug( str );
string_append_char( str, 'h');
string_append_char( str, 't');
string_append_char( str, 't');
string_append_char( str, 'p');
string_append_char( str, ':');
string_append_char( str, '/');
string_append_char( str, '/');
string_append_char( str, 'g');
string_append_char( str, 'o');
string_append_char( str, 'o');
string_append_char( str, 'g');
string_append_char( str, 'l');
string_append_char( str, 'e');
string_append_char( str, '.');
string_append_char( str, 'c');
string_append_char( str, 'o');
string_append_char( str, 'm');
string_append_char( str, '/');
string_append_char( str, '?');
string_append_char( str, 's');
string_append_char( str, '=');
string_append_char( str, 'f');
string_append_char( str, 'i');
string_append_char( str, 's');
string_append_char( str, 'h');
//string_finish(str);
printf("String %s", str);
string_dealloc(str);
}
#define is ==#define isnt !=- please don't!string_resize()don't return the address of the new allocation. There's no way for the caller to know anything about the new memory block. Therefore they continue to scribble on the freed memory.string_resizedoes actually return the address of the new allocation, because it takes anuint8_t**as a parameter, allowing theuint8_t*to be modified. But the function which callsstring_resizedoesn't.