Question

I have a little C code that dumps a process memory then tries to REGEX match a string. All goes well if I want to dump the process but REGEX fails, or I wrongly search the memory buffer. Any idea?

#define _LARGEFILE64_SOURCE

#include <stdlib.h>
#include <stdio.h>
#include <sys/ptrace.h>
#include <unistd.h>
#include <fcntl.h>
#include <regex.h>

void dump_region(int fd, off64_t start, off64_t end)
{
        char buf[4096];
        int a, i;
        regex_t re;
        regmatch_t pm;

        a = regcomp(&re, "([0-9]{10,20})", REG_EXTENDED);

        if(a!=0)
            printf(" -> Error: Invalid Regex");

        lseek64(fd, start, SEEK_SET);
        while(start < end) {
         int rd;

         rd = read(fd, buf, 4096);
         //write(STDOUT_FILENO, buf, rd); // HERE dumping is OK
         a = regexec(&re, &buf[0], 1, &pm, REG_EXTENDED); // something I do wrong here
         if(a==0) {
             for(i = pm.rm_so; i < pm.rm_eo; i++)
                 printf("%c", buf[i]);
                 printf("\n");
         }
         start += 4096;
        }
}

int main(int argc, char *argv[])
{
        FILE *maps;
        int mem;
        pid_t pid;
        char path[BUFSIZ];

        if(argc < 2) {
         fprintf(stderr, "usage: %s pid\n", argv[0]);
         return EXIT_FAILURE;
        }

        pid = strtol(argv[1], NULL, 10);
        if(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) {
         perror("ptrace");
         return EXIT_FAILURE;
        }

        snprintf(path, sizeof(path), "/proc/%d/maps", pid);
        maps = fopen(path, "r");

        snprintf(path, sizeof(path), "/proc/%d/mem", pid);
        mem = open(path, O_RDONLY);

        if(maps && mem != -1) {
         char buf[BUFSIZ + 1];

         while(fgets(buf, BUFSIZ, maps)) {
                off64_t start, end;

                sscanf(buf, "%llx-%llx", &start, &end);
                dump_region(mem, start, end);
         }
        }

        ptrace(PTRACE_DETACH, pid, NULL, NULL);
        if(mem != -1)
         close(mem);
        if(maps)
         fclose(maps);

        return EXIT_SUCCESS;
}

EDIT:

Tried another version, still something going wrong or something I just miss...

#define _LARGEFILE64_SOURCE

#include <stdlib.h>
#include <stdio.h>
#include <sys/ptrace.h>
#include <unistd.h>
#include <fcntl.h>
#include <regex.h>

void dump_region(int fd, off64_t start, off64_t end)
{
        char buf[4096];
        int status,i;
        int cflags = REG_EXTENDED;
        regmatch_t pmatch[1];
        const size_t nmatch=1;
        regex_t reg;
        const char *pattern="([0-9]{10,20})";

        regcomp(&reg, pattern, cflags);

        lseek64(fd, start, SEEK_SET);
        while(start < end) {
                int rd;

                rd = read(fd, buf, sizeof buf - 1);
                if(rd > 0)
                {
                  buf[rd] = '\0';
                  status = regexec(&reg, buf, nmatch, pmatch, 0);
                  if(status == REG_NOMATCH)
                        printf("No Match\n");
                  else if(status == 0){
                        printf("Match:\n");
                        for (i=pmatch[0].rm_so; i<pmatch[0].rm_eo; ++i) {
                                putchar(buf[i]);
                        }
                        printf("\n");
                  }
                  regfree(&reg);
                  return;
                }
                start += 4096;
        }
}

int main(int argc, char *argv[])
{
        FILE *maps;
        int mem;
        pid_t pid;
        char path[BUFSIZ];

        if(argc < 2) {
                fprintf(stderr, "usage: %s pid\n", argv[0]);
                return EXIT_FAILURE;
        }

        pid = strtol(argv[1], NULL, 10);
        if(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) {
                perror("ptrace");
                return EXIT_FAILURE;
        }

        snprintf(path, sizeof(path), "/proc/%d/maps", pid);
        maps = fopen(path, "r");

        snprintf(path, sizeof(path), "/proc/%d/mem", pid);
        mem = open(path, O_RDONLY);

        if(maps && mem != -1) {
                char buf[BUFSIZ + 1];

                while(fgets(buf, BUFSIZ, maps)) {
                        off64_t start, end;

                        sscanf(buf, "%llx-%llx", &start, &end);
                        dump_region(mem, start, end);
                }
        }

        ptrace(PTRACE_DETACH, pid, NULL, NULL);
        if(mem != -1)
                close(mem);
        if(maps)
                fclose(maps);

        return EXIT_SUCCESS;
}

Any help? Idea?

UPDATE. Seems the second version partially works but from about 1193 matches I get with an egrep from the dumped memory file I get only TWO with my code. Any idea?

Was it helpful?

Solution

There is no guarantee that the buffer you pass to regexec() is a valid string, which is should be. The manual page for regexec() describes the function as:

regexec() is used to match a null-terminated string against the precompiled pattern buffer [...]

So, you must make sure the data being searched is a proper string, by doing something like:

rd = read(fd, buf, sizeof buf - 1);
if(rd > 0)
{
  buf[rd] = '\0';
  a = regexec(&re, buf, 1, &pm, REG_EXTENDED);
  /* ... rest of code ... */
}  

This also protects against read errors, when rd will be < 0.

As a general note, realize that "streaming" a regular expression matcher isn't this trivial in general; what if the RE match straddles your read buffer size? Then you risk missing a match, since you only allow the matcher to see disjoint "windows" into the actual data.

Furter, as pointed out in @xtmtrx's comment, the data you're reading isn't text, it's binary. This means it will often contain lots of "strange" values, for instance embedded 0-bytes that will look (to regexec()) as string terminators. If one of those appear before some of the data you're looking for in one block, you will miss the data since regexec() won't read past the end of string.

One solution is to filter out the non-textual data before trying to match, use a loop and isprint(), for instance.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top