Question

Based on the Wikipedia entry as well as the Intel manual, rdpmc should be available to user-mode processes as long as bit 8 of CR4 is set. However, I am still running into general protection error when trying to run rdpmc from userspace even with that bit set.

I am running on an 8-core Intel X3470 on kernel 2.6.32-279.el6.x86_64.

Here is the user-mode program I am trying to execute:

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>

#include <sched.h>
#include <assert.h>

uint64_t
read_pmc(int ecx)
{
    unsigned int a, d;
    __asm __volatile("rdpmc" : "=a"(a), "=d"(d) : "c"(ecx));
    return ((uint64_t)a) | (((uint64_t)d) << 32);
}

int main(int ac, char **av)
{
    uint64_t start, end;
    cpu_set_t cpuset;
    unsigned int c;
    int i;

    if (ac != 3) {
        fprintf(stderr, "usage: %s cpu-id pmc-num\n", av[0]);
        exit(EXIT_FAILURE);
    }

    i = atoi(av[1]);
    c = atoi(av[2]);

    CPU_ZERO(&cpuset);
        CPU_SET(i, &cpuset);
        assert(sched_setaffinity(0, sizeof(cpuset), &cpuset) == 0);

    printf("%lu\n", read_pmc(c));
    return 0;
}

Here is the kernel module which sets the bit and reads out CR4 so I can manually verify that the bit has been set.

/*  
 *  Enable PMC in user mode.
 */
#include <linux/module.h>
#include <linux/kernel.h>



int init_module(void)
{
    typedef long unsigned int uint64_t;
    uint64_t output;
    // Set CR4, Bit 8  to enable PMC
__asm__("push   %rax\n\t"
                "mov    %cr4,%rax;\n\t"
                "or     $(1 << 7),%rax;\n\t"
                "mov    %rax,%cr4;\n\t"
                "wbinvd\n\t"
                "pop    %rax"
    );

    // Read back CR4 to check the bit.
    __asm__("\t mov %%cr4,%0" : "=r"(output));
    printk(KERN_INFO "%lu", output);

    return 0;
}
void cleanup_module(void)
{
__asm__("push   %rax\n\t"
        "push   %rbx\n\t"
                "mov    %cr4,%rax;\n\t"
                "mov  $(1 << 7), %rbx\n\t"
                "not  %rbx\n\t"
                "and   %rbx, %rax;\n\t"
                "mov    %rax,%cr4;\n\t"
                "wbinvd\n\t"
                "pop    %rbx\n\t"
                "pop    %rax\n\t"
    ); 
}
Was it helpful?

Solution

Apparently, when Intel says Bit 8, they are referring to the 9th bit from the right, since their indexing begins at 0. Replacing $(1 << 7) with $(1 << 8) globally resolves the issue, and allows rdpmc to be called from user mode.

Here is the updated kernel module, also using on_each_cpu to make sure that it is set on every core.

/*  
 *  Read PMC in kernel mode.
 */
#include <linux/module.h>   /* Needed by all modules */
#include <linux/kernel.h>   /* Needed for KERN_INFO */

static void printc4(void) {
    typedef long unsigned int uint64_t;
    uint64_t output;
    // Read back CR4 to check the bit.
    __asm__("\t mov %%cr4,%0" : "=r"(output));
    printk(KERN_INFO "%lu", output);
}

static void setc4b8(void * info) {
    // Set CR4, Bit 8 (9th bit from the right)  to enable
__asm__("push   %rax\n\t"
                "mov    %cr4,%rax;\n\t"
                "or     $(1 << 8),%rax;\n\t"
                "mov    %rax,%cr4;\n\t"
                "wbinvd\n\t"
                "pop    %rax"
    );

    // Check which CPU we are on:
    printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
    printc4();
}

static void clearc4b8(void * info) {
    printc4();
__asm__("push   %rax\n\t"
        "push   %rbx\n\t"
                "mov    %cr4,%rax;\n\t"
                "mov  $(1 << 8), %rbx\n\t"
                "not  %rbx\n\t"
                "and   %rbx, %rax;\n\t"
                "mov    %rax,%cr4;\n\t"
                "wbinvd\n\t"
                "pop    %rbx\n\t"
                "pop    %rax\n\t"
    );
    printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
}



int init_module(void)
{
    on_each_cpu(setc4b8, NULL, 0);
    return 0;
}
void cleanup_module(void)
{
    on_each_cpu(clearc4b8, NULL, 0);
}

OTHER TIPS

Echoing "2" to /sys/bus/event_source/devices/cpu/rdpmc allows user processes to access performance counters via the rdpmc instruction. Note that behaviour has changed. Prior to 4.0 "1" meant "enabled" while meant "0" disable. Now "1" means allow only for processes that have active perf events. More details: http://man7.org/linux/man-pages/man2/perf_event_open.2.html

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top