Question

I have an std::string containing a command to be executed with execv, what is the best "C++" way to convert it to the "char *argv[]" that is required by the second parameter of execv()?

To clarify:

std::string cmd = "mycommand arg1 arg2";
char *cmd_argv[];

StrToArgv(cmd, cmd_argv); // how do I write this function?

execv(cmd_argv[0], cmd_argv);
Was it helpful?

Solution

std::vector<char *> args;
std::istringstream iss(cmd);

std::string token;
while(iss >> token) {
  char *arg = new char[token.size() + 1];
  copy(token.begin(), token.end(), arg);
  arg[token.size()] = '\0';
  args.push_back(arg);
}
args.push_back(0);

// now exec with &args[0], and then:

for(size_t i = 0; i < args.size(); i++)
  delete[] args[i];

Of course, this won't work with commans that use quoting like rm "a file.mp3". You can consider the POSIX function wordexp which cares about that and much more.

OTHER TIPS

Very non-unixy answers here. What's wrong with:

std::string cmd = "echo hello world";
execl("/bin/sh", "/bin/sh", "-c", cmd.c_str(), NULL);

Why bother writing a command line parser when there's a perfectly good one already on the system?

(Note: one good reason is because you don't trust the string you're about to execute. One hopes that this is already true, but the shell will do "more" with that string than a naive whitespace-splitter will and thus open more security holes if you aren't careful.)

A combination of the c_str() string method and strtok() to split it up by spaces should get you the array of strings you need to pass to exec() and its related functions.

Perhaps split_winmain from Boost.ProgramOptions. Boost is a good choice in most cases. http://www.boost.org/doc/libs/1_40_0/doc/html/program_options/howto.html#id1396212

If you are only interested in Windows (other kernels generally don't know about command lines in the Windows sense), you can use the API function CommandLineToArgvW which uses the same conventions as the MS C runtime.

In general it depends on the quoting style of the platform and/or the shell. The Microsoft C Runtime uses a quite different style than e.g. bash!

OK, I've been stumbling over this myself enough times. This is straight "C", so it can be plugged into either C or C++. It treats single and double quote strings differently. The caller is responsible for deallocating argv[0] (if not NULL) and argv.

#include 
#include 
#include 
#include 

typedef enum {
    STR2AV_OK       = 0,
    STR2AV_UNBALANCED_QUOTE
} str_to_argv_err_t;

#ifndef NUL
#define NUL '\0'
#endif

static char const nomem[] = "no memory for %d byte allocation\n";

static str_to_argv_err_t
copy_raw_string(char ** dest_p, char ** src_p);

static str_to_argv_err_t
copy_cooked_string(char ** dest_p, char ** src_p);

static inline void *
Xmalloc(size_t sz)
{
    void * res = malloc(sz);
    if (res == NULL) {
        fprintf(stderr, nomem, sz);
        exit(EXIT_FAILURE);
    }
    return res;
}

static inline void *
Xrealloc(void * ptr, size_t sz)
{
    void * res = realloc(ptr, sz);
    if (res == NULL) {
        fprintf(stderr, nomem, sz);
        exit(EXIT_FAILURE);
    }
    return res;
}

str_to_argv_err_t
string_to_argv(char const * str, int * argc_p, char *** argv_p)
{
    int     argc = 0;
    int     act  = 10;
    char ** res  = Xmalloc(sizeof(char *) * 10);
    char ** argv = res;
    char *  scan;
    char *  dest;
    str_to_argv_err_t err;

    while (isspace((unsigned char)*str))  str++;
    str = scan = strdup(str);

    for (;;) {
        while (isspace((unsigned char)*scan))  scan++;
        if (*scan == NUL)
            break;

        if (++argc >= act) {
            act += act / 2;
            res  = Xrealloc(res, act * sizeof(char *));
            argv = res + (argc - 1);
        }

        *(argv++) = dest = scan;

        for (;;) {
            char ch = *(scan++);
            switch (ch) {
            case NUL:
                goto done;

            case '\\':
                if ( (*(dest++) = *(scan++)) == NUL)
                    goto done;
                break;

            case '\'':
                err = copy_raw_string(&dest, &scan);
                if (err != STR2AV_OK)
                    goto error_leave;
                break;

            case '"':
                err = copy_cooked_string(&dest, &scan);
                if (err != STR2AV_OK)
                    goto error_leave;
                break;

            case ' ':
            case '\t':
            case '\n':
            case '\f':
            case '\r':
            case '\v':
            case '\b':
                goto token_done;

            default:
                *(dest++) = ch;
            }
        }

    token_done:
        *dest = NUL;
    }

done:

    *argv_p = res;
    *argc_p = argc;
    *argv   = NULL;
    if (argc == 0)
        free((void *)str);

    return STR2AV_OK;

error_leave:

    free(res);
    free((void *)str);
    return err;
}

static str_to_argv_err_t
copy_raw_string(char ** dest_p, char ** src_p)
{
    for (;;) {
        char ch = *((*src_p)++);

        switch (ch) {
        case NUL: return STR2AV_UNBALANCED_QUOTE;
        case '\'':
            *(*dest_p) = NUL;
            return STR2AV_OK;

        case '\\':
            ch = *((*src_p)++);
            switch (ch) {
            case NUL:
                return STR2AV_UNBALANCED_QUOTE;

            default:
                /*
                 * unknown/invalid escape.  Copy escape character.
                 */
                *((*dest_p)++) = '\\';
                break;

            case '\\':
            case '\'':
                break;
            }
            /* FALLTHROUGH */

        default:
            *((*dest_p)++) = ch;
            break;
        }
    }
}

static char
escape_convt(char ** src_p)
{
    char ch = *((*src_p)++);

    /*
     *  Escape character is always eaten.  The next character is sometimes
     *  treated specially.
     */
    switch (ch) {
    case 'a': ch = '\a'; break;
    case 'b': ch = '\b'; break;
    case 't': ch = '\t'; break;
    case 'n': ch = '\n'; break;
    case 'v': ch = '\v'; break;
    case 'f': ch = '\f'; break;
    case 'r': ch = '\r'; break;
    }

    return ch;
}


static str_to_argv_err_t
copy_cooked_string(char ** dest_p, char ** src_p)
{
    for (;;) {
        char ch = *((*src_p)++);
        switch (ch) {
        case NUL: return STR2AV_UNBALANCED_QUOTE;
        case '"':
            *(*dest_p) = NUL;
            return STR2AV_OK;

        case '\\':
            ch = escape_convt(src_p);
            if (ch == NUL)
                return STR2AV_UNBALANCED_QUOTE;
            /* FALLTHROUGH */

        default:
            *((*dest_p)++) = ch;
            break;
        }
    }
}

This is a variation on litb's answer, but without all the manual memory allocation. It still won't handle quoting.

#include <vector>
#include <string>
#include <sstream>

std::string cmd = "mycommand arg1 arg2";
std::istringstream ss(cmd);
std::string arg;
std::list<std::string> ls;
std::vector<char*> v;
while (ss >> arg)
{
   ls.push_back(arg); 
   v.push_back(const_cast<char*>(ls.back().c_str()));
}
v.push_back(0);  // need terminating null pointer

execv(v[0], &v[0]);

I feel kind of dirty about the const_cast<>, but programs really shouldn't be modifying the contents of the argv strings.

You can use the c_str() function of std::string to convert to char*. The strtok function will split the string using the ' ' delimiter.

Matt Peitrek's LIBTINYC has a module called argcargv.cpp that takes a string and parses it out to the argument array taking quoted arguments into account. Note that it's Windows-specific, but it's pretty simple so should be easy to move to whatever platform you want.

If you do that, also change it to take as parameters the loaction to put the count and the a pointer to the argv array instead of using externs (just my little bit of advice). Matt didn't need that because LIBTINYC was the runtime.

Alternatively, you can look in your compiler's runtime source (nearly all provide it) to see what they do to parse the commandline and either call that directly (if that turns out to be workable) or borrow the ideas from that bit of code.

May be it is too late to answer on this question but you could use standart POSIX functions glob or wordexp:

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <wordexp.h>

int
main(int argc, char **argv)
{
   wordexp_t p;
   char *exec_path = "/bin/ls";

   p.we_offs = 1;
   wordexp("-l -t /etc", &p, WRDE_DOOFFS);
   p.we_wordv[ 0 ] = exec_path;
   execv(exec_path, p.we_wordv);

   /* This code is unreachable */
   exit(EXIT_SUCCESS);
}

It would prepare 3 parameters: -l (long listing format), -t (sort by modification time) and directory /etc to list, and run /bin/ls. Call wordexp() gives you exactly the same result as call /bin/sh -c recomended previously but spawaned process would have parent process not /bin/sh.

As it turned out a function exist somewhat hidden in boost program options for this. The split_unix() function works with escaped and quoted command lines.

#include "boost/program_options/parsers.hpp"


auto parts = boost::program_options::split_unix(commandLine);
std::vector<char*> cstrings ;
for(auto& str : parts){
    cstrings.push_back(const_cast<char*> (str.c_str()));
}

int argc = (int)cstrings.size();
char** argv = cstrings.data();
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top