Pregunta

I would like to be able to extract audio from a video file and load it into a buffer played by OpenAL, but I don't know where to begin.

AVFoundation seems the easiest way to do it (compared to FFMPEG, isn't it ?) but I can't find the way to get a buffer playable with OpenAL. I'm using ObjectAL on MacOSX that works very well.

I'm looking for advices, code examples or tutorials about it.

¿Fue útil?

Solución

For people who can be interested, here is how I did it. It takes all the formats AVFoundation takes in input.

  • Load my file (video or audio) in an AVAsset ;
  • Get the audio track (AVAssetTrack) ;
  • Convert the track in PCM in an NSData ;
  • Add the WAV header (optional -> ALBuffer needs the data WITHOUT the header) ;
  • Feed an ALBuffer (from ObjectAL) with it.

Here is the code to build the PCM (you'll notice that I'm building 2 buffers, as I needed the audio file reversed).

// ---- Create the forward and backward WAV buffers
//      Feeds the bufferDictionary with NSData objects (one for each buffer)
//      Returns YES if the task is completed
//
- (BOOL) wavDataBuffersWithAsset:(AVURLAsset *)asset assetTrack:(AVAssetTrack *)audioTrack
{

// ---- We get the file format description to feed our data array
//
NSArray* formatDesc = [audioTrack formatDescriptions];

CMAudioFormatDescriptionRef item = (CMAudioFormatDescriptionRef)[formatDesc objectAtIndex:0];
const AudioStreamBasicDescription* fileDescription = CMAudioFormatDescriptionGetStreamBasicDescription (item);

// ---- Sometimes (on movie files, stereo) we can see that the "bits per channel" item is set to 0
//      We initialize it by default to 16.
//
uint32_t                            sampleRate = fileDescription->mSampleRate;
uint16_t                            bitDepth = fileDescription->mBitsPerChannel == 0 ? 16 : fileDescription->mBitsPerChannel;
uint16_t                            channels = fileDescription->mChannelsPerFrame;
// uint32_t                            byteRate = bitDepth * sampleRate * channels / 8; // -> used only by the WAV header creation method

ALenum                              audioFormat;

// ---- We get the format of the files to build ObjectAL buffers later
//      Default is 16
//
/*
switch (bitDepth) {
    case 8:
    {
        if (channels == 1) {
            audioFormat = AL_FORMAT_MONO8;
        } else if (channels == 2) {
            audioFormat = AL_FORMAT_STEREO8;
        }
    }
        break;
    default:
    {
        if (channels == 1) {
            audioFormat = AL_FORMAT_MONO16;
        } else if (channels == 2) {
            audioFormat = AL_FORMAT_STEREO16;
        }
    }
        break;
}
*/
if (channels == 1) {
    audioFormat = AL_FORMAT_MONO16;
} else if (channels == 2) {
    audioFormat = AL_FORMAT_STEREO16;
}


if ([self isCancelled]) {
    return NO;
}


// ---- We initialize a reader, in order to be able to feed our NSData
//
AVAssetReader* reader = [[AVAssetReader alloc] initWithAsset:asset error:nil];

NSDictionary *settings = [NSDictionary dictionaryWithObjectsAndKeys:
                          [NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
                          [NSNumber numberWithFloat:(float)sampleRate], AVSampleRateKey,
                          [NSNumber numberWithInt:bitDepth], AVLinearPCMBitDepthKey,
                          [NSNumber numberWithBool:NO], AVLinearPCMIsNonInterleaved,
                          [NSNumber numberWithBool:NO], AVLinearPCMIsFloatKey,
                          [NSNumber numberWithBool:NO], AVLinearPCMIsBigEndianKey, nil];

AVAssetReaderTrackOutput* readerOutput = [AVAssetReaderTrackOutput assetReaderTrackOutputWithTrack:audioTrack outputSettings:settings];

[reader addOutput:readerOutput];
[reader startReading];

// ---- We create a WAV buffer
//      Header + raw PCM
//

NSMutableData *audioData = [[[NSMutableData alloc] init] autorelease];
NSMutableData *reverseData = [[[NSMutableData alloc] init] autorelease];

// ---- We create an array to receive the data chunks, so we can reverse it later
//
NSMutableArray *reversedDataArray = [[[NSMutableArray alloc] init] autorelease];


if ([self isCancelled]) {
    return NO;
}


// ---- Reads the samples from the AVAsset and append them subsequently
//
while ([reader status] != AVAssetReaderStatusCompleted) {

    CMSampleBufferRef buffer = [readerOutput copyNextSampleBuffer];
    if (buffer == NULL) continue;

    CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(buffer);
    size_t size = CMBlockBufferGetDataLength(blockBuffer);
    uint8_t *outBytes = malloc(size);
    CMBlockBufferCopyDataBytes(blockBuffer, 0, size, outBytes);
    CMSampleBufferInvalidate(buffer);
    CFRelease(buffer);

    [audioData appendBytes:outBytes length:size];

    // ---- We add the reversed data at the beginning of our array
    //
    [reversedDataArray insertObject:[NSData dataWithBytes:outBytes length:size] atIndex:0];

    free(outBytes);

}


if ([self isCancelled]) {
    return NO;
}


// ---- We append the reversed data to our NSMutableData object
//
for (NSData *data in reversedDataArray) {
    [reverseData appendData:data];
}

// ---- NO WAV header with OpenAL
//
/*
NSMutableData *headerData = [self wavHeaderWithDataLength:[audioData length] channels:channels bitDepth:bitDepth sampleRate:sampleRate byteRate:byteRate];
NSMutableData *headerReverseData = [[headerData mutableCopy] autorelease];
[headerData appendData:audioData];
[headerReverseData appendData:reverseData];
*/

[bufferDictionary setObject:audioData forKey:@"forward"];
[bufferDictionary setObject:reverseData forKey:@"backward"];
[bufferDictionary setObject:[NSNumber numberWithInteger:audioFormat] forKey:@"audioFormat"];
[bufferDictionary setObject:[NSNumber numberWithInt:sampleRate] forKey:@"sampleRate"];

return YES;

}

And the WAV header, if you need it:

// ---- Creates the WAV data header and returns it
//
- (NSMutableData *) wavHeaderWithDataLength:(NSUInteger)length channels:(int)channels bitDepth:(int)bitDepth sampleRate:(long)sampleRate byteRate:(long)byteRate
{
// ---- The WAV header is 44 bytes long
//
long totalAudioLen = length;
long totalDataLen = totalAudioLen + 44;

// ---- The WAV header
//
Byte *header = (Byte*)malloc(44);
header[0] = 'R';
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (Byte) (totalDataLen & 0xff);
header[5] = (Byte) ((totalDataLen >> 8) & 0xff);
header[6] = (Byte) ((totalDataLen >> 16) & 0xff);
header[7] = (Byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f';
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = bitDepth; // 16;  // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1;  // format = 1
header[21] = 0;
header[22] = (Byte) channels;
header[23] = 0;
header[24] = (Byte) (sampleRate & 0xff);
header[25] = (Byte) ((sampleRate >> 8) & 0xff);
header[26] = (Byte) ((sampleRate >> 16) & 0xff);
header[27] = (Byte) ((sampleRate >> 24) & 0xff);
header[28] = (Byte) (byteRate & 0xff);
header[29] = (Byte) ((byteRate >> 8) & 0xff);
header[30] = (Byte) ((byteRate >> 16) & 0xff);
header[31] = (Byte) ((byteRate >> 24) & 0xff);
header[32] = (Byte) (2 * 8 / 8);  // block align
header[33] = 0;
header[34] = bitDepth; // 16;  // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (Byte) (totalAudioLen & 0xff);
header[41] = (Byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (Byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (Byte) ((totalAudioLen >> 24) & 0xff);

// ---- The header NSData
//
NSMutableData *headerData = [NSMutableData dataWithBytes:header length:44];

free(header);

return headerData;

}

Hope it can be useful for someone.

Licenciado bajo: CC-BY-SA con atribución
No afiliado a StackOverflow
scroll top