Question

I am working on an embedded systems project using the Xilinx Zedboard. The board has the ability to asymmetrically split it's dual core ARM A9 processor to run two separate programs simultaneously. I've configured the board to run Linux on one core and a bare metal application on the other acting as a hardware controller. For inter processor communication I am utilizing on chip memory that is shared between the two processors. I'm struggling with my lock implementation and I'm curious if anyone experience with such things or may be able to point me in the right direction.

I found a mutex implementation on the ARM reference website http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dht0008a/ch01s03s02.html and I have adapted it to C inline assembly. After testing the lock function seems to hang and I have no idea why. My experience with assembly is somewhat limited, I've seen it in school and understand the high level concepts but I'm completely lost in the low level implementation.

Does this look right or is this even the right approach? All I need is a simple mechanism that will allow me to perform atomic operations on my inter process communication structures (several message queues).

mutex.h

#ifndef __OCM_MUTEX_H__
#define __OCM_MUTEX_H__

#include <stdint.h>

#define LOCKED      1
#define UNLOCKED    0

typedef uint32_t mutex_t;

extern void ocm_lock_mutex(volatile mutex_t* mutex);
extern void ocm_unlock_mutex(volatile mutex_t* mutex);

#endif

mutex.c

#include "mutex.h"

void ocm_lock_mutex(volatile mutex_t* mutex) {
    int result;
    const uint32_t locked = LOCKED;

    __asm__ __volatile__("@ocm_lock_mutex\n"
"1: LDREX   %[r2], [%[r0]]\n"
"   CMP     %[r2], %[locked]\n"
"   BEQ     2f\n"
"   STREXNE %[r2], %[locked], [%[r0]]\n"
"   CMP     %[r2], #1\n"
"   BEQ     1b\n"
"   DMB\n"
"   B       3f\n"
"2: WFE\n"
"   B       1b\n"
"3: NOP\n"
    : [r2] "=r" (result), [r0] "=r" (mutex)
    : [locked] "r" (locked));
}

void ocm_unlock_mutex(volatile mutex_t* mutex) {
    const uint32_t unlocked = UNLOCKED;

    __asm__ __volatile__("@ocm_unlock_mutex\n"
"   DMB\n"
"   STR %[unlocked], [%[r0]]\n"
"   DSB\n"
"   SEV\n"
    : [r0] "=r" (mutex)
    : [unlocked] "r" (unlocked));
}
Was it helpful?

Solution

Why don't you use GNU assembly? It should look something like this

.equ locked,1
.equ unlocked,0

@ lock_mutex
@ Declare for use from C as extern void lock_mutex(void * mutex);
    .global lock_mutex
lock_mutex:
    LDR     r1, =locked
1:  LDREX   r2, [r0]
    CMP     r2, r1        @ Test if mutex is locked or unlocked
    BEQ     2f            @ If locked - wait for it to be released, from 2
    STREXNE r2, r1, [r0]  @ Not locked, attempt to lock it
    CMPNE   r2, #1        @ Check if Store-Exclusive failed
    BEQ     1b            @ Failed - retry from 1
    # Lock acquired
    DMB                   @ Required before accessing protected resource
    BX      lr

2:  @ Take appropriate action while waiting for mutex to become unlocked
    @ WAIT_FOR_UPDATE
    B       1b            @ Retry from 1


@ unlock_mutex
@ Declare for use from C as extern void unlock_mutex(void * mutex);
    .global unlock_mutex
unlock_mutex:
    LDR     r1, =unlocked
    DMB                   @ Required before releasing protected resource
    STR     r1, [r0]      @ Unlock mutex
    @ SIGNAL_UPDATE   
    BX      lr

Then its dump looks like this

$ arm-linux-gnueabihf-objdump -d mutex.o

mutex.o:     file format elf32-littlearm


Disassembly of section .text:

00000000 <lock_mutex>:
   0:   e3a01001    mov r1, #1
   4:   e1902f9f    ldrex   r2, [r0]
   8:   e1520001    cmp r2, r1
   c:   0a000004    beq 24 <lock_mutex+0x24>
  10:   11802f91    strexne r2, r1, [r0]
  14:   13520001    cmpne   r2, #1
  18:   0afffff9    beq 4 <lock_mutex+0x4>
  1c:   f57ff05f    dmb sy
  20:   e12fff1e    bx  lr
  24:   eafffff6    b   4 <lock_mutex+0x4>

00000028 <unlock_mutex>:
  28:   e3a01000    mov r1, #0
  2c:   f57ff05f    dmb sy
  30:   e5801000    str r1, [r0]
  34:   e12fff1e    bx  lr

However what I'm wondering if you did managed to configure both cores to be included in core coherency. To my knowledge you can specify which cores participate in ldrex/strex operations.

OTHER TIPS

As for why your code hangs, this could be because of the WFE instruction. If no event happens, it will do nothing. Forever. Check prior if events are enabled and produced.

(Also, check the useage restrictions on STREX and LDREX in the ARM Architecture reference manual, that should be in section A2.9.4 "Usage restrictions")

There is an example on how to implement a spin lock on: https://www.doulos.com/knowhow/arm/Hints_and_Tips/Implementing_Semaphores/

Applying their example to your code would lead to something like this:

__asm__ __volatile__("@ocm_lock_mutex\n"
"   LDREX   %[r2], [%[r0]]\n"
"   CMP     %[r2], %[locked]\n"
"   STREXNE %[r2], %[locked], [%[r0]]\n"
"   CMPNE   %[r2], #1\n"
"   BEQ     ocm_lock_mutex\n"
    : [r2] "=r" (result), [r0] "=r" (mutex)
    : [locked] "r" (locked));

This will implement the mutex with busy waiting.

If you want your code to tell you if a mutex was acquired without the busy waiting, just modify the end:

__asm__ __volatile__("@ocm_lock_mutex\n"
[...]
"   CMPNE   %[r2], #1\n"
"   BEQ     ocm_lock_mutex_end\n"
"   MOV    %[r2], #2\n"
"@ocm_lock_mutex_end\n"
"   NOP\n"
    : [r2] "=r" (result), [r0] "=r" (mutex)
    : [locked] "r" (locked));

and just check in C:

if (result==0) {/*You didn't get the mutex, it was locked*/}
else if (result==1) {/*You didn't get the mutex, access not exclusive*/}
else if (result==2) {/*You got the mutex!*/}

(As indicated by the ARM Architecture Reference Manual, version 2005, A2.9.4 "Load and store operation")

It is entirely reasonable to construct the outside "busy waiting" loop in C. Or, if you want to have an interrupt-based scheme, suspend operation from there.

Rule(s) of thumb:

  • Keep your inline assembly code as small as possible and loop-free.

  • Make your inline assembly only do one thing at a time.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top