Question

I wrote a simple url parser using strtok(). here's the code

#include <stdio.h>
#include <stdlib.h>

typedef struct {
    char *protocol;
    char *host;
    int port;
    char *path;
} aUrl;


void parse_url(char *url, aUrl *ret) {

    printf("Parsing %s\n", url);
    char *tmp = (char *)_strdup(url);
    //char *protocol, *host, *port, *path;
    int len = 0;

    // protocol agora eh por exemplo http: ou https:
    ret->protocol = (char *) strtok(tmp, "/");
    len = strlen(ret->protocol) + 2;

    ret->host = (char *) strtok(NULL, "/");


    len += strlen(ret->host);

    //printf("char at %d => %c", len, url[len]);

    ret->path = (char *)_strdup(&url[len]);

    ret->path = (char *) strtok(ret->path, "#");

    ret->protocol = (char *) strtok(ret->protocol, ":");

    // host agora é por exemplo address.com:8080
    //tmp = (char *)_strdup(host);
    //strtok(tmp, ":");
    ret->host = (char *) strtok(ret->host, ":");
    tmp = (char *) strtok(NULL, ":");

    if(tmp == NULL) {
        if(strcmp(ret->protocol, "http") == 0) {
            ret->port = 80;
        } else if(strcmp(ret->protocol, "https") == 0) {
            ret->port = 443;
        }
    } else {
        ret->port = atoi(tmp);
    }


    //host = (char *) strtok(NULL, "/");




}

/*
 * 
 */
int main(int argc, char** argv) {
    printf("hello moto\n");

    aUrl myUrl;
    parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);


    printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);

    return (EXIT_SUCCESS);
}

As you can see, I use strtok() a lot so I can "slice" the url. I don't need to support urls different than http or https so the way it's done solves all of my problems. My concern is (this is running on an embedded device) - Am I wasting memory ? When I write something like

ret->protocol = (char *) strtok(tmp, "/");

And then later call

ret->protocol = (char *) strtok(ret->protocol, ":");

Does me first pointer ret->protocol held remain in memory ? I thought that maybe I should set the first call to a tmp pointer, call strtok pointing ret->protocol to the right portion of the string (the second call) and then free(tmp).

What should be the best way to use strtok ?

Was it helpful?

Solution

To answer your question directly, strtok only returns a pointer to a location inside the string you give it as input-- it doesn't allocate new memory for you, so shouldn't need to call free on any of the pointers it gives you back in return.

For what it's worth, you could also look into "strchr" and "strstr", which are nondestructive ways of searching for single characters or sequences within strings.

Also note that your memory allocation is problematic here-- you're using strdup() to allocate a new string inside your parse function, and then you're assigning fragments of that memory block to fields of "ret". Your caller will thus be responsible for free'ing the strdup'd string, but since you're only passing that string back implicitly inside ret, the caller needs to know magically what pointer to pass to free. (Probably ret->protocol, but maybe not, depending on how the input looks.)

OTHER TIPS

strtok modifies the string in place, replacing the specified characters with NULL. Since strings in C are NULL-terminated, it now appears that your original pointer is pointing to a shorter string, even though the original string is still there and still occupies the same amount of memory (but with characters replaced with NULL). The end of the string, I think, contains a double-NULL.

The short answer is this: Keep a pointer to the beginning of your string buffer, and have another pointer that is your "current" pointer into the string as you parse it. When you use strtok or iterate over the string in other ways you update the "current" pointer but leave the beginning pointer alone. When you're finished, free() the beginning pointer. No memory leaked.

Do you know you can continue parsing the string using NULL as first parameter of strtok?

First call:

char* token = strtok(string, delimiters);

Then:

token = strtok(NULL, other_delimiters);

This allow you to simplify your code:

int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
   return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");

// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
   return -1;

//get host
token = strtok(NULL, "/");
if( token == NULL )
   return -1;
strcpy(ret->host, token);

// get path
token = strtok(NULL, "#");
if( token == NULL )
   return -1;
strcpy(ret->path, token);

// ...

return 0;
}

You can see I had a return value to know if parsing was successfully done.

Thanks for sharing your code! I ran it inside valgrind and fixed two memory leaks generated by strdup functions.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char *protocol;
    char *host;
    int port;
    char *path;
} URL;

void parse_url(char *url, URL *ret) {
    char *tmp = (char *) strdup(url);
    int len = 0;

    ret->protocol = (char *) strtok(tmp, "/");
    len = strlen(ret->protocol) + 2;
    ret->host = (char *) strtok(NULL, "/");
    len += strlen(ret->host);
    ret->path = (char *) strdup(&url[len]);
    ret->path = (char *) strtok(ret->path, "#");
    ret->protocol = (char *) strtok(ret->protocol, ":");
    ret->host = (char *) strtok(ret->host, ":");
    tmp = (char *) strtok(NULL, ":");

    if (tmp == NULL) {
        if (strcmp(ret->protocol, "http") == 0) {
            ret->port = 80;
        } else if (strcmp(ret->protocol, "https") == 0) {
            ret->port = 443;
        }
    } else {
        ret->port = atoi(tmp);
    }

}

void free_url(URL *url) {
    free(url->path);
    free(url->protocol);
}

int main(int argc, char** argv) {
    URL url;
    parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
    printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
    free_url(&url);

    return (EXIT_SUCCESS);
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top