Question

I have the following code (it reads a process virtual memory and matches some strings using libpcre), it compiles without errors but if I compile it with -Wall I get some warnings I will show after the code.

The compiled code runs but crashes with *** glibc detected *** ./readmempcreuniq: double free or corruption (fasttop): 0x097b9c80 ***, I suspect the problem is on the line pcre_get_substring(page, vector, pairs, 0, &buff); because the first argument of the function expects 'const char *' but gets 'unsigned char *', how can I make it right?

#ifdef TARGET_64
// for 64bit target (see /proc/cpuinfo addr size virtual)
#define MEM_MAX (1ULL << 48)
#else
#define MEM_MAX (1ULL << 32)
#endif

#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <pcre.h>
#include <locale.h>

int main(int argc, char **argv)
{
    if (argc < 2) {
        printf("Usage: %s <pid>\n", argv[0]);
        exit(1);
    }

    char buf[128];
    int pid = atoi(argv[1]);
    snprintf(buf, sizeof(buf), "/proc/%d/mem", pid);
    int fd = open(buf, O_RDONLY);
    if (fd == -1) {
        fprintf(stderr, "Error opening mem file: %m\n");
        exit(1);
    }

    pcre *f;
    pcre_extra *f_ext;
    char *pattern = "([0-9]{20,22})";
    const char *errstr;
    int errchar;
    int vector[50];
    int vecsize = 50;
    int pairs;
    const char *buff;
    const unsigned char *tables;
    int a;
    int count = 0;
    const char **matches = NULL;
    const char **more_matches;

    char *loc = setlocale(LC_ALL, 0);
    setlocale(LC_ALL, loc);
    tables = pcre_maketables();

    long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
    if (ptret == -1) {
        fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
        close(fd);
        exit(1);
    }

    unsigned char page[4096];
    unsigned long long offset = 0;


    while (offset < MEM_MAX) {
        lseek64(fd, offset, SEEK_SET);

        ssize_t ret;
        ret = read(fd, page, sizeof(page));

        if (ret > 0) {
            page[ret] = '\0';
            if((f = pcre_compile(pattern, PCRE_CASELESS|PCRE_MULTILINE, &errstr, &errchar, tables)) == NULL)
            {
                printf("Error: %s\nCharacter N%i\nPattern:%s\n", errstr, errchar, pattern);
            }
            else
            {
                f_ext = pcre_study(f, 0, &errstr);
                a = 0;

                while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
                {
                    pcre_get_substring(page, vector, pairs, 0, &buff);
                    //printf("%s\n", buff);
                    more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
                    if (more_matches!=NULL)
                    {
                        matches=more_matches;
                        matches[count++]=buff;
                    }
                    else
                    {
                        free(matches);
                        puts("Error (re)allocating memory");
                        exit(1);
                    }
                    a = vector[1] + 1;
                }
                int matches_len = count;
                const char *uniques[matches_len];
                int uniques_len = 0;
                int already_exists;
                int i, j;
                for (i = 0; i < matches_len; i++)
                {
                    already_exists = 0;
                    for ( j = 0; j < uniques_len; j++)
                    {
                        if (!strcmp(matches[i], uniques[j]))
                        {
                            already_exists = 1;
                            break;
                        }
                    }
                    if (!already_exists)
                    {
                        uniques[uniques_len] = matches[i];
                        uniques_len++;
                    }
                }
                for (i = 0; i < uniques_len; i++)
                {
                    printf("%s\n", uniques[i]);
                }
                free(matches);
                pcre_free(f);
            }

        }

        offset += sizeof(page);
    }

    ptrace(PTRACE_DETACH, pid, 0, 0);
    close(fd);
    return 0;
}

The error:

xtmtrx@server:~/regex/proc$ ./readmempcreuniq 5663
92991999918876543209
99299299292663552673
111992229922222288
119988922220000077
*** glibc detected *** ./readmempcreuniq: double free or corruption (fasttop): 0x097b9c80 ***
======= Backtrace: =========
/lib/libc.so.6(+0x6c0c1)[0xb7dfa0c1]
/lib/libc.so.6(+0x6d930)[0xb7dfb930]
/lib/libc.so.6(+0x71681)[0xb7dff681]
/lib/libc.so.6(realloc+0xe3)[0xb7dffb13]
./readmempcreuniq[0x8048c86]
/lib/libc.so.6(__libc_start_main+0xe7)[0xb7da4ce7]
./readmempcreuniq[0x80488b1]
======= Memory map: ========
08048000-0804a000 r-xp 00000000 fd:01 68388533                           /root/regex/proc/readmempcreuniq
0804a000-0804b000 r--p 00001000 fd:01 68388533                           /root/regex/proc/readmempcreuniq
0804b000-0804c000 rw-p 00002000 fd:01 68388533                           /root/regex/proc/readmempcreuniq
097a1000-097c2000 rw-p 097a1000 00:00 0                                  [heap]
b7c00000-b7c21000 rw-p b7c00000 00:00 0
b7c21000-b7d00000 ---p b7c21000 00:00 0
b7d66000-b7d80000 r-xp 00000000 fd:01 65901968                           /lib/libgcc_s.so.1
b7d80000-b7d81000 r--p 00019000 fd:01 65901968                           /lib/libgcc_s.so.1
b7d81000-b7d82000 rw-p 0001a000 fd:01 65901968                           /lib/libgcc_s.so.1
b7d8c000-b7d8e000 rw-p b7d8c000 00:00 0
b7d8e000-b7ee5000 r-xp 00000000 fd:01 65901949                           /lib/libc-2.12.1.so
b7ee5000-b7ee7000 r--p 00157000 fd:01 65901949                           /lib/libc-2.12.1.so
b7ee7000-b7ee8000 rw-p 00159000 fd:01 65901949                           /lib/libc-2.12.1.so
b7ee8000-b7eeb000 rw-p b7ee8000 00:00 0
b7eeb000-b7f1e000 r-xp 00000000 fd:01 65901993                           /lib/libpcre.so.3.12.1
b7f1e000-b7f1f000 r--p 00032000 fd:01 65901993                           /lib/libpcre.so.3.12.1
b7f1f000-b7f20000 rw-p 00033000 fd:01 65901993                           /lib/libpcre.so.3.12.1
b7f29000-b7f2c000 rw-p b7f29000 00:00 0
b7f2c000-b7f48000 r-xp 00000000 fd:01 65901940                           /lib/ld-2.12.1.so
b7f48000-b7f49000 r--p 0001b000 fd:01 65901940                           /lib/ld-2.12.1.so
b7f49000-b7f4a000 rw-p 0001c000 fd:01 65901940                           /lib/ld-2.12.1.so
bf8c1000-bf8d6000 rw-p 7ffffffe9000 00:00 0                              [stack]
Aborted

Warnings compiling the code with -Wall switch:

xtmtrx@server:~/regex/proc$ gcc -o readmempcreuniq readmempcreuniq.c -lpcre -Wall readmempcreuniq.c: In function 'main': readmempcreuniq.c:83: warning: pointer targets in passing argument 3 of 'pcre_exec' differ in signedness /usr/include/pcre.h:286: note: expected 'const char *' but argument is of type 'unsigned char *' readmempcreuniq.c:85: warning: pointer targets in passing argument 1 of 'pcre_get_substring' differ in signedness /usr/include/pcre.h:297: note: expected 'const char *' but argument is of type 'unsigned char *'

EDIT:

As per @stdcall tip, I compiled the program with efence then used GDB on the core dump:

xtmtrx@server:~/regex/proc$ ./readmempcreuniq 6036

  Electric Fence 2.1 Copyright (C) 1987-1998 Bruce Perens.
5,
Segmentation fault (core dumped)
xtmtrx@server:~/regex/proc$ gdb ./readmempcreuniq core
GNU gdb (GDB) 7.2-ubuntu
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i686-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /root/regex/proc/readmempcreuniq...done.
[New Thread 6093]
Reading symbols from /lib/libpcre.so.3...(no debugging symbols found)...done.
Loaded symbols for /lib/libpcre.so.3
Reading symbols from /usr/lib/libefence.so.0...(no debugging symbols found)...done.
Loaded symbols for /usr/lib/libefence.so.0
Reading symbols from /lib/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libpthread.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib/libpthread.so.0
Reading symbols from /lib/ld-linux.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib/ld-linux.so.2
Core was generated by `./readmempcreuniq 6036'.
Program terminated with signal 11, Segmentation fault.
#0  0x08048ef8 in main (argc=2, argv=0xbfe1d2d4) at readmempcreuniq.c:125
125                                                     uniques[uniques_len] = matches[i];

seems @alk is right that the problem resides in uniques[uniques_len] = matches[i];

NEW EDIT:

As per @alk tip, I've changed the line:

for ( j = 0; j < uniques_len; j++)

to:

for ( j = 0; j < matches_len; j++)

now the segfault is elsewhere:

Program terminated with signal 11, Segmentation fault.
#0  0x08048ea7 in main (argc=2, argv=0xbfaaa3e4) at readmempcreuniq.c:118
118                                                     if (!strcmp(matches[i], uniques[j]))
Was it helpful?

Solution

You never reset matches to NULL after free()ing it before the next round in the loop. Consequently it still retains the original value after the first round of reallocs.

Either set it to NULL prior to entering the inner processing loop (redundant on the first pass), or set it back to NULL immediately after the free(matches). Alternatively you could simply make it local to the encompassing scope of the next-outer loop with an initial NULL value as you have it, but the changes previously mentioned are the most minimal I can think of.

Examples

matches = NULL; // HERE
while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
{
        pcre_get_substring(page, vector, pairs, 0, &buff);
        //printf("%s\n", buff);
        more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
        if (more_matches!=NULL)
        {
                matches=more_matches;
                matches[count++]=buff;
        }
        else
        {
                free(matches);
                puts("Error (re)allocating memory");
                exit(1);
        }
        a = vector[1] + 1;
}

Or....

for (i = 0; i < uniques_len; i++)
{
        printf("%s\n", uniques[i]);
}
free(matches);
matches = NULL; // or HERE
pcre_free(f);

More Stuff

Continuing down the path of things I've noticed:

This:

ssize_t ret;
ret = read(fd, page, sizeof(page));

if (ret > 0) {
        page[ret] = '\0';

appears to be trying to set a null char terminator. if so, you're invoking undefined behavior on a full-populated buffer. It should be this:

ssize_t ret = read(fd, page, sizeof(page)-1); // NOTE SPACE FOR TERM
if (ret > 0) {
        page[ret] = 0;

If the size of the buffer is specific (you chose 4K for a reason) it should be 4097 to ensure a max-exact 4K buffer.


And another...

You're reading the page, which I cannot claim is or is not requiring to be terminated as I showed in the code before. But assuming it is and you did I I suggested (or.. not), this also looks wrong:

while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)

Here you're passing the size of the entire buffer; not the size of the actual data you read. I'm the first person to tell you I'm unfamiliar with the API, but I'm fairly sure this should be:

// notice the length of the buffer passed, ret
while((pairs = pcre_exec(f, f_ext, page, ret, a, PCRE_NOTEMPTY, vector, vecsize)) >=0)

In other words, on an undersized read you're telling it the data is longer than it really is. Again, I'm naive to their API, but this seems reasonable.


Of Unique Matches...

Hopefully easier to read.

int matches_len = count, uniques_len = 0;
int i = 0, j = 0;

const char *uniques[matches_len];
for (i=0; i < matches_len; ++i)
{
    for (j = 0; j < uniques_len; ++j)
    {
        if (!strcmp(matches[i], uniques[j]))
            break;
    }

    if (j == uniques_len)
        uniques[uniques_len++] = matches[i];
}

for (i = 0; i < uniques_len; ++i)
    printf("%s\n", uniques[i]);

Continuing on...

Reset count to zero after each page. Right after the free(matches); matches = NULL; would be a good place.

Worth noting. you have no exit case in your outer loop once the file reads start failing, so there will be much slamming on the file that is unable to seek beyond its end. until you reach your limiter count.


Final Thoughts

I think this is close to what you're trying to do:

#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <pcre.h>
#include <locale.h>
#include <ctype.h>

int main(int argc, char **argv)
{
    // CHANGE TO ACCEPT PROC-ID FROM CMDLINE    
    int pid = 5916;

    setlocale(LC_ALL,"");

    const char *error = NULL;
    int erroffset = 0;
    const char **uniques = NULL;
    size_t uniques_len = 0;

    const char regex[] = "[0-9A-Fa-f]{8}";
    pcre* re = pcre_compile (regex,          /* the pattern */
                    PCRE_MULTILINE|PCRE_DOTALL|PCRE_NEWLINE_ANYCRLF,
                    &error,         /* for error message */
                    &erroffset,     /* for error offset */
                    0);             /* use default character tables */
    if (!re)
    {
        printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
        return -1;
    }

    // start proc trace
    long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
    if (ptret == -1)
    {
        fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
        exit(1);
    }

    char path[256];
    snprintf(path, sizeof(path), "/proc/%d/maps", pid);
    FILE *maps = fopen(path, "r");
    snprintf(path, sizeof(path), "/proc/%d/mem", pid);
    int mem = open(path, O_RDONLY);

    if(maps && (mem != -1))
    {
        char buf[BUFSIZ + 1];
        while(fgets(buf, BUFSIZ, maps))
        {
            long long unsigned int start, end;
            if (sscanf(buf, "%llx-%llx", &start, &end) != 2)
                break;

            printf("reading %llx - %llx\n", start, end);

            lseek64(mem, start, SEEK_SET);
            while (start < end)
            {
                char page[4096] =  {0};
                int rd = read(mem, page, sizeof(page));
                if (rd < 0)
                    break;

                start += sizeof(page);

                int ov[128] = {0};
                unsigned int ov_len = 0;
                int rc = 0;

                while ((rc = pcre_exec(re, 0, page, (int)(rd), ov_len, 0, ov, 128)) >= 0)
                {
                    int i = 0;
                    for(; i < rc; ++i)
                    {
                        const char *sp = NULL;
                        pcre_get_substring(page, ov, rc, i, &sp);

                        // search unique list
                        size_t j=0;
                        for (;j<uniques_len;++j)
                        {
                            if (!strcmp(sp, uniques[j]))
                            break;
                        }

                        if (uniques_len == j)
                        {
                            const char **tmp = realloc(uniques, (uniques_len+1)*sizeof(*uniques));
                            if (tmp == NULL)
                            {
                                perror("Failed to resize uniques.");
                                pcre_free_substring(sp);
                            }
                            else
                            {
                                uniques = tmp;
                                uniques[uniques_len++] = sp;
                            }
                        }
                        else
                        {   // delete string. not needed
                            pcre_free_substring(sp);
                        }
                    }
                    ov_len = ov[2*(rc-1)]+1;
                }
            }
        }

        fclose(maps);
        close(mem);
    }

    size_t n = 0;
    for (; n<uniques_len; ++n)
    {
        printf("%s\n", uniques[n]);
        pcre_free_substring(uniques[n]);
    }
    printf("total uniques: %lu\n", uniques_len);
    free(uniques);

    ptrace(PTRACE_DETACH, pid, 0, 0);
    return 0;
}

Caveat. I know zero about this API, but what I've seen here and briefly reviewed online. YMMV UAYOR. but it seems to be you had it all along. Just accumulate uniques independent of pages (which I think will still be a problem, page boundaries, but thats for another day).

OTHER TIPS

Besides the issues pointed out by WhozCraig's answer:

The code defines

  const char *uniques[matches_len];

but loops its index j until <uniques_len

    for ( j = 0; j < uniques_len; j++)
    {
      if (!strcmp(matches[i], uniques[j]))
      {
        already_exists = 1;
        break;
      }
    }

So uniques is likely to be accessed out of bounds, provoking undefined behaviuor leading to the crash.

Update:

Further investgations showed this is no issue here, although being a dangerous construction.


The issue is with matches not pointing to proper allocated memory int this line:

 if (!strcmp(matches[i], uniques[j]))

To reveal this issue add proper memory initialisation after realloc()ing memory by changing this code:

          int count = 0;
          const char ** matches = NULL;
          [...]

                    more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
                    if (more_matches!=NULL)
                    {
                        matches=more_matches;
                        matches[count++]=buff;
                    }

to become:

          size_t count = 0, count_prev = 0;
          const char ** matches = NULL;
          [...]

                    more_matches = realloc(matches, (count + 1) * sizeof(*more_matches));
                    if (more_matches != NULL)
                    {
                        memset(more_matches + count_prev, 0, (count + 1 - count_prev) * sizeof(*more_matches));
                        count_prev = count;
                        matches = more_matches;
                        matches[count++] = buff;
                    }

As a general advice: When debugging always compile with symbols on (option -g to gcc), then run the code under gdb and Valgrind. Those two tools put the finger on most of the issues in your code.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top