Question

I'm trying to implement a simplified version of Lamport's Bakery Algorithm in C before I attempt to use it to solve a more complex problem.* The simplification I am making is that the lock is only shared by only two threads instead of N.

I set up two threads (via OpenMP to keep things simple) and they loop, attempting to increment a shared counter within their critical section. If everything goes according to plan, then the final counter value should be equal to the number of iterations. However, here's some example output:

count: 9371470 (expected: 10000000)

Doh! Something is broken, but what? My implementation is pretty textbook (for reference), so perhaps I'm misusing memory barriers? Did I forget to mark something as volatile?

My code:

#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <omp.h>

typedef struct
{
    volatile bool entering[2];
    volatile uint32_t number[2];
} SimpleBakeryLock_t;

inline void mb() { __sync_synchronize(); }

inline void lock(SimpleBakeryLock_t* l, int id)
{
    int i = id, j = !id;
    uint32_t ni, nj;

    l->entering[i] = true;
    mb();

    ni = 1 + l->number[j];
    l->number[i] = ni;
    mb();

    l->entering[i] = false;
    mb();

    while (l->entering[j]) {
        mb();
    }

    nj = l->number[j];
    mb();
    while ((nj != 0) && (nj < ni || (nj == ni && j < i)))
    {
        nj = l->number[j];   // re-read
        mb();
    }
}

inline void unlock(SimpleBakeryLock_t* l, int id)
{
    l->number[id] = 0;
    mb();
}

SimpleBakeryLock_t x;

int main(void)
{
    const uint32_t iterations = 10000000;
    uint32_t count = 0;

    bool once = false;
    int i;

    memset((void*)&x, 0, sizeof(x));
    mb();

    // set OMP_NUM_THREADS=2 in your environment!
    #pragma omp parallel for schedule(static, 1) private(once, i)
    for(uint32_t dummy = 0; dummy < iterations; ++dummy)
    {
        if (!once)
        {
            i = omp_get_thread_num();
            once = true;
        }

        lock(&x, i);
        {

            count = count + 1;
            mb();
        }
        unlock(&x, i);
    }

    printf("count: %u (expected: %u)\n", count, iterations);

    return 0;
}

To compile and run (on Linux), do:

$ gcc -O3 -fopenmp bakery.c
$ export OMP_NUM_THREADS=2
$ ./a.out
  • I intend to chain simple Bakery locks into a binary tree (tournament style) to achieve mutual exclusion among N threads.
Was it helpful?

Solution

I tracked down two problems and the code now works. Issues:

  1. __sync_synchronize() was not generating the mfence instruction on my platform (Apple's GCC 4.2.1). Replacing __sync_synchronize() with an explicit mfence resolves this issue.
  2. I was doing something wrong with the OpenMP private variables (still not sure what...). Sometimes the two threads entered the lock with the same identity (ex. both may say they were thread 0). Recomputing 'i' with 'omp_get_thread_num' on every iteration seems to do the trick.

Here is the corrected code for completeness:

#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <omp.h>

#define cpu_relax() asm volatile ("pause":::"memory")
#define mb() asm volatile ("mfence":::"memory")

/* Simple Lamport bakery lock for two threads. */
typedef struct
{
    volatile uint32_t entering[2];
    volatile uint32_t number[2];
} SimpleBakeryLock_t;

void lock(SimpleBakeryLock_t* l, int id)
{
    int i = id, j = !id;
    uint32_t ni, nj;

    l->entering[i] = 1;
    mb();

    ni = 1 + l->number[j];
    l->number[i] = ni;
    mb();

    l->entering[i] = 0;
    mb();

    while (l->entering[j]) {
        cpu_relax();
    }

    do {
        nj = l->number[j];
    } while ((nj != 0) && (nj < ni || (nj == ni && j < i)));
}

void unlock(SimpleBakeryLock_t* l, int id)
{
    mb();  /* prevent critical section writes from leaking out over unlock */
    l->number[id] = 0;
    mb();
}

SimpleBakeryLock_t x;

int main(void)
{
    const int32_t iterations = 10000000;
    int32_t dummy;
    uint32_t count = 0;

    memset((void*)&x, 0, sizeof(x));
    mb();

    // set OMP_NUM_THREADS=2 in your environment!
    #pragma omp parallel for schedule(static, 1)
    for(dummy = 0; dummy < iterations; ++dummy)
    {
        int i = omp_get_thread_num();
        lock(&x, i);
        count = count + 1;
        unlock(&x, i);
    }

    printf("count: %u (expected: %u)\n", count, iterations);

    return 0;
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top