Question

I'm using a script to run a program with LD_PRELOAD with a library created by me to intercept some calls, it works well but at some point the process calls clone() and I lose the ability to intercept what's next (the program is run again without my library), is there any way to overcome this? call is

clone(child_stack, 
  CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | 
  CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID, 
  parent_tidptr, tls, child_tidptr)

Looking over the parameters of clone I saw that there is the ability to trace the child process as well, but nothing pertaining to preloading.

I should also mention that I'm trying to intercept all calls on a specific file descriptor and the process clones file descriptors so I'm not even sure if it would be possible to do what I want without some flag to clone (problem is I don't understand all of them).

UPDATE: I'm using this trying to log all activity done by qemu-dm (which is run by xen)

#define _LARGEFILE64_SOURCE
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <dlfcn.h>
#include <stdio.h>
#include <stdarg.h>

#define dprintf(...) if(__debug__) { char tmp[256]; int cnt = sprintf(tmp, __VA_ARGS__); _write_f_(2, tmp, cnt); _write_f_(__outfile__, tmp, cnt); }

typedef int (*_open_f_t_)(const char *path, int flags, ...);
typedef int (*_open64_f_t_)(const char *path, int flags, ...);
typedef FILE *(*_fopen_f_t_)(const char *path, const char *mode);
typedef int (*_close_f_t_)(int fd);
typedef ssize_t (*_read_f_t_)(int fd, void *buf, size_t count);
typedef ssize_t (*_write_f_t_)(int fd, const void *buf, size_t count);
typedef off_t (*_lseek_f_t_)(int fd, off_t offset, int whence);

static _open_f_t_ _open_f_ = NULL;
static _open64_f_t_ _open64_f_ = NULL;
static _fopen_f_t_ _fopen_f_ = NULL;
static _close_f_t_ _close_f_ = NULL;
static _read_f_t_ _read_f_ = NULL;
static _write_f_t_ _write_f_ = NULL;
static _lseek_f_t_ _lseek_f_ = NULL;
static int __outfile__ = NULL;
static int __debug__ = 0;

void __init__ ()
{
    _open_f_ = (_open_f_t_)dlsym(RTLD_NEXT, "open");
    _open64_f_ = (_open64_f_t_)dlsym(RTLD_NEXT, "open64");
    _fopen_f_ = (_fopen_f_t_)dlsym(RTLD_NEXT, "fopen");
    _close_f_ = (_close_f_t_)dlsym(RTLD_NEXT, "close");
    _read_f_ = (_read_f_t_)dlsym(RTLD_NEXT, "read");
    _write_f_ = (_write_f_t_)dlsym(RTLD_NEXT, "write");
    _lseek_f_ = (_lseek_f_t_)dlsym(RTLD_NEXT, "lseek");
    unlink("/tmp/qemu-dm-preload.log");
    __outfile__ = _open_f_("/tmp/out-0", O_WRONLY | O_CREAT | O_APPEND);
    __debug__ = 1;
}

void __fini__ ()
{
    __debug__ = 0;
    fsync(__outfile__);
    _close_f_(__outfile__);
}

int open(const char *path, int flags, ...)
{
    //replace this
    int result;
    if (flags & O_CREAT)
    {
        va_list arg;
        int mode = 0;
        va_start (arg, flags);
        mode = va_arg (arg, int);
        va_end (arg);
        result = _open_f_(path, flags, mode);
        dprintf("open(%s, %d, %d) => %d\n", path, flags, mode, result);
    } else {
        result = _open_f_(path, flags);
        dprintf("open(%s, %d) => %d\n", path, flags, result);
    }
    return result;
}

int open64(const char *path, int flags, ...)
{
    //replace this
    int result;
    if (flags & O_CREAT)
    {
        va_list arg;
        int mode = 0;
        va_start (arg, flags);
        mode = va_arg (arg, int);
        va_end (arg);
        result = _open64_f_(path, flags, mode);
        dprintf("open(%s, %d, %d) => %d\n", path, flags, mode, result);
    } else {
        result = _open64_f_(path, flags);
        dprintf("open(%s, %d) => %d\n", path, flags, result);
    }

    return result;
}

FILE * fopen(const char *path, const char *mode)
{
    FILE *result = _fopen_f_(path, mode);
    dprintf("fopen(%s, %s) => %p\n", path, mode, result);
    return result;
}

int close(int fd)
{
    //replace this
    int result = _close_f_(fd);
    dprintf("close(%d) => %d\n", fd, result);
    return result;
}

ssize_t read(int fd, void *buf, size_t count)
{
    // replace this
    ssize_t result = _read_f_(fd, buf, count);
    dprintf("read(%d, %p, %lu) => %ld\n", fd, buf, count, result);
    return result;
}

ssize_t write(int fd, const void *buf, size_t count)
{
    // replace this
    ssize_t result = _write_f_(fd, buf, count);
    dprintf("write(%d, %p, %lu) => %ld\n", fd, buf, count, result);
    return result;
}

off_t lseek(int fd, off_t offset, int whence)
{
    // replace this
    off_t result = _lseek_f_(fd, offset, whence);
    dprintf("lseek(%d, %ld, %d) => %ld\n", fd, offset, whence, result);
    return result;
}

compiled with gcc -ggdb -shared -fPIC -Wl,-init,__init__ -Wl,-fini,__fini__ -o fileaccesshooks.so -ldl fileaccesshooks.c

wrapper script contents:

#!/bin/bash
export LD_PRELOAD=/home/xception/work/fileaccesshooks.so
exec /usr/lib/xen/bin/qemu-dm-orig "$@"

As observed in comments below the environment is actually the same for the task and the process (LD_PRELOAD is the same for both /proc/8408/task/8526/environ and /proc/8408/environ) however after the call to clone no more data is logged grep -e "testfile" -e "(11" /tmp/out-0

open(/root/testfile.raw, 2) => 11
read(11, 0x7fffb7259d00, 512) => 512
read(11, 0x7fba6e341200, 512) => 512
read(11, 0x7fba6e341200, 512) => 512
read(11, 0x7fba6e341200, 512) => 512
read(11, 0x7fba6e341200, 512) => 512
read(11, 0x7fba6e341200, 512) => 512
read(11, 0x7fba6e341200, 512) => 512

this is what I get, however comparatively the output of strace -f run on the same executable contains significantly more reads as well as seeks

Was it helpful?

Solution 2

After a long investigation here are my findings:

  • #include <unistd.h> was the biggest mistake as it redirects file access calls to their 64-bit equivalents, thus really limiting what I can actually catch (I could only catch the lower reads as the higher ones used read64 or pread64 instead)
  • need to implement all functions with both 32-bit and 64-bit versions
  • although strace reports a lot of lseek and read calls xen's qemu-dm actually uses pread and pread64 instead (which the same strace reports correctly when using qemu instead for some reason)
  • defining _GNU_SOURCE (which is required for RTLD_NEXT) defines off_t to be the same as off64_t so make sure you use the proper types for offsets as the application you are trying to intercept

After removing the unistd.h include and implementing open, open64. fopen, fopen64, read, read64, write, write64, pread, pread64, preadv, preadv64, pwrite, pwrite64, pwritev, pwritev64, close I now finally get significantly more output than before and the implementation actually works (there are still some missing file access functions that need to be defined for a complete solution but the reason I opened this question is solved).

OTHER TIPS

From the clone parameters of CLONE_VM and similar, it looks like this call to clone is simply creating a new thread rather than a new process. I wouldn't expect the resulting thread to reload any libraries and therefore I would not expect your preloaded library to need to act again in the new thread - your existing function implementations should 'just work'; all the jump instructions into your library should remain equally valid in the new thread as the old.

I am therefore suspicious that this is not your problem and that the clone is a red herring.

My only theories are:

  • There's an exec somewhere as well
  • The __init__ code in your library is getting called for each new thread, though this seems very unlikely indeed.

One last point regarding qemu specifically - modern qemu uses coroutines for lots of IO things. It uses various backends depending on what's available on the host system - if you're unlucky, it creates a thread for each one which can result in very, very large numbers of threads. Read here - http://lists.gnu.org/archive/html/qemu-devel/2011-07/msg02894.html - there's some way to get the qemu configure stuff to report what coroutine backend it's using. However, I suspect the Xen qemu-dm might be too old to have this coroutine stuff? I don't know.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top