Extracting audio from video file and play it in OpenAL

Question

For people who can be interested, here is how I did it. It takes all the formats AVFoundation takes in input.

Load my file (video or audio) in an AVAsset ;
Get the audio track (AVAssetTrack) ;
Convert the track in PCM in an NSData ;
Add the WAV header (optional -> ALBuffer needs the data WITHOUT the header) ;
Feed an ALBuffer (from ObjectAL) with it.

Here is the code to build the PCM (you'll notice that I'm building 2 buffers, as I needed the audio file reversed).

// ---- Create the forward and backward WAV buffers
//      Feeds the bufferDictionary with NSData objects (one for each buffer)
//      Returns YES if the task is completed
//
- (BOOL) wavDataBuffersWithAsset:(AVURLAsset *)asset assetTrack:(AVAssetTrack *)audioTrack
{

// ---- We get the file format description to feed our data array
//
NSArray* formatDesc = [audioTrack formatDescriptions];

CMAudioFormatDescriptionRef item = (CMAudioFormatDescriptionRef)[formatDesc objectAtIndex:0];
const AudioStreamBasicDescription* fileDescription = CMAudioFormatDescriptionGetStreamBasicDescription (item);

// ---- Sometimes (on movie files, stereo) we can see that the "bits per channel" item is set to 0
//      We initialize it by default to 16.
//
uint32_t                            sampleRate = fileDescription->mSampleRate;
uint16_t                            bitDepth = fileDescription->mBitsPerChannel == 0 ? 16 : fileDescription->mBitsPerChannel;
uint16_t                            channels = fileDescription->mChannelsPerFrame;
// uint32_t                            byteRate = bitDepth * sampleRate * channels / 8; // -> used only by the WAV header creation method

ALenum                              audioFormat;

// ---- We get the format of the files to build ObjectAL buffers later
//      Default is 16
//
/*
switch (bitDepth) {
    case 8:
    {
        if (channels == 1) {
            audioFormat = AL_FORMAT_MONO8;
        } else if (channels == 2) {
            audioFormat = AL_FORMAT_STEREO8;
        }
    }
        break;
    default:
    {
        if (channels == 1) {
            audioFormat = AL_FORMAT_MONO16;
        } else if (channels == 2) {
            audioFormat = AL_FORMAT_STEREO16;
        }
    }
        break;
}
*/
if (channels == 1) {
    audioFormat = AL_FORMAT_MONO16;
} else if (channels == 2) {
    audioFormat = AL_FORMAT_STEREO16;
}


if ([self isCancelled]) {
    return NO;
}


// ---- We initialize a reader, in order to be able to feed our NSData
//
AVAssetReader* reader = [[AVAssetReader alloc] initWithAsset:asset error:nil];

NSDictionary *settings = [NSDictionary dictionaryWithObjectsAndKeys:
                          [NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
                          [NSNumber numberWithFloat:(float)sampleRate], AVSampleRateKey,
                          [NSNumber numberWithInt:bitDepth], AVLinearPCMBitDepthKey,
                          [NSNumber numberWithBool:NO], AVLinearPCMIsNonInterleaved,
                          [NSNumber numberWithBool:NO], AVLinearPCMIsFloatKey,
                          [NSNumber numberWithBool:NO], AVLinearPCMIsBigEndianKey, nil];

AVAssetReaderTrackOutput* readerOutput = [AVAssetReaderTrackOutput assetReaderTrackOutputWithTrack:audioTrack outputSettings:settings];

[reader addOutput:readerOutput];
[reader startReading];

// ---- We create a WAV buffer
//      Header + raw PCM
//

NSMutableData *audioData = [[[NSMutableData alloc] init] autorelease];
NSMutableData *reverseData = [[[NSMutableData alloc] init] autorelease];

// ---- We create an array to receive the data chunks, so we can reverse it later
//
NSMutableArray *reversedDataArray = [[[NSMutableArray alloc] init] autorelease];


if ([self isCancelled]) {
    return NO;
}


// ---- Reads the samples from the AVAsset and append them subsequently
//
while ([reader status] != AVAssetReaderStatusCompleted) {

    CMSampleBufferRef buffer = [readerOutput copyNextSampleBuffer];
    if (buffer == NULL) continue;

    CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(buffer);
    size_t size = CMBlockBufferGetDataLength(blockBuffer);
    uint8_t *outBytes = malloc(size);
    CMBlockBufferCopyDataBytes(blockBuffer, 0, size, outBytes);
    CMSampleBufferInvalidate(buffer);
    CFRelease(buffer);

    [audioData appendBytes:outBytes length:size];

    // ---- We add the reversed data at the beginning of our array
    //
    [reversedDataArray insertObject:[NSData dataWithBytes:outBytes length:size] atIndex:0];

    free(outBytes);

}


if ([self isCancelled]) {
    return NO;
}


// ---- We append the reversed data to our NSMutableData object
//
for (NSData *data in reversedDataArray) {
    [reverseData appendData:data];
}

// ---- NO WAV header with OpenAL
//
/*
NSMutableData *headerData = [self wavHeaderWithDataLength:[audioData length] channels:channels bitDepth:bitDepth sampleRate:sampleRate byteRate:byteRate];
NSMutableData *headerReverseData = [[headerData mutableCopy] autorelease];
[headerData appendData:audioData];
[headerReverseData appendData:reverseData];
*/

[bufferDictionary setObject:audioData forKey:@"forward"];
[bufferDictionary setObject:reverseData forKey:@"backward"];
[bufferDictionary setObject:[NSNumber numberWithInteger:audioFormat] forKey:@"audioFormat"];
[bufferDictionary setObject:[NSNumber numberWithInt:sampleRate] forKey:@"sampleRate"];

return YES;

}

And the WAV header, if you need it:

// ---- Creates the WAV data header and returns it
//
- (NSMutableData *) wavHeaderWithDataLength:(NSUInteger)length channels:(int)channels bitDepth:(int)bitDepth sampleRate:(long)sampleRate byteRate:(long)byteRate
{
// ---- The WAV header is 44 bytes long
//
long totalAudioLen = length;
long totalDataLen = totalAudioLen + 44;

// ---- The WAV header
//
Byte *header = (Byte*)malloc(44);
header[0] = 'R';
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (Byte) (totalDataLen & 0xff);
header[5] = (Byte) ((totalDataLen >> 8) & 0xff);
header[6] = (Byte) ((totalDataLen >> 16) & 0xff);
header[7] = (Byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f';
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = bitDepth; // 16;  // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1;  // format = 1
header[21] = 0;
header[22] = (Byte) channels;
header[23] = 0;
header[24] = (Byte) (sampleRate & 0xff);
header[25] = (Byte) ((sampleRate >> 8) & 0xff);
header[26] = (Byte) ((sampleRate >> 16) & 0xff);
header[27] = (Byte) ((sampleRate >> 24) & 0xff);
header[28] = (Byte) (byteRate & 0xff);
header[29] = (Byte) ((byteRate >> 8) & 0xff);
header[30] = (Byte) ((byteRate >> 16) & 0xff);
header[31] = (Byte) ((byteRate >> 24) & 0xff);
header[32] = (Byte) (2 * 8 / 8);  // block align
header[33] = 0;
header[34] = bitDepth; // 16;  // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (Byte) (totalAudioLen & 0xff);
header[41] = (Byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (Byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (Byte) ((totalAudioLen >> 24) & 0xff);

// ---- The header NSData
//
NSMutableData *headerData = [NSMutableData dataWithBytes:header length:44];

free(header);

return headerData;

}

Hope it can be useful for someone.