IMFTransform::ProcessOutput returns MF_E_TRANSFORM_STREAM_CHANGE for HE-AAC -> PCM

https://stackoverflow.com/questions/16565292

29-05-2022
|

Question

I'm trying to use the Media Foundation Transforms to convert a HE-AAC audio stream (received by live555, a RTP library), into PCM (to be played via the waveOutXxx Win32 API). However, at present I'm processing pre-recorded packets.

When I call IMFTransform::ProcessOutput, it returns MF_E_TRANSFORM_STREAM_CHANGE. The documentation for this in _MFT_PROCESS_OUTPUT_STATUS indicates that I should determine the correct streamType, and call IMFTransform::SetOutputType again.

However, I am unable to determine what the correct parameters for SetOutputType.

For reference, the RTSP description of the stream is

m=audio 0 RTP/AVP 97
a=rtpmap:97 mpeg4-generic/16000/1
a=fmtp:97 streamtype=5; profile-level-id=15; mode=AAC-hbr; config=1408; 
  sizeLength=13; indexLength=3; indexDeltaLength=3; profile=1; bitrate=32000;

My code is (sorry for the length; error handling removed for brevity)

static IMFMediaType *s_inputMediaType;
static IMFMediaType *s_outputMediaType;
static IMFTransform *s_transform;
static DWORD         s_outputSampleSize;

static void InitMFT()
{
    HRESULT res;
    res = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); // probably should use threaded in production
#if 0
    res = MFCreateMediaType(&inputMediaType);
    res = MFCreateMediaType(&outputMediaType);

    res = inputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
    res = inputMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC);
    res = inputMediaType->SetUINT32(MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 15);
    res = inputMediaType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, 1); // Audio Data Transport Stream

    res = outputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
    res = outputMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
    res = outputMediaType->SetUINT32(MF_MT_SAMPLE_SIZE, 2);
    res = outputMediaType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, 2);
    res = outputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 8 * 2);
    res = outputMediaType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, 8000);
    res = outputMediaType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 8000 * 2 *2);
#endif
    CLSID *clsids = NULL;
    UINT32 clsidCount=0;

    MFT_REGISTER_TYPE_INFO inputType  = {MFMediaType_Audio, MEDIASUBTYPE_MPEG_HEAAC}; // WAVE_FORMAT_MPEG_HEAAC can't be searched for
    MFT_REGISTER_TYPE_INFO outputType = {MFMediaType_Audio, WAVE_FORMAT_PCM};         // MEDIASUBTYPE_PCM doesn't exist (but can be search for if created)

    res = MFTEnum(MFT_CATEGORY_AUDIO_DECODER, 0,
                  &inputType,
                  NULL, /* &outputType, */ // search fails if outputType is not NULL
                  NULL, &clsids, &clsidCount);

    ListTranscoders(clsids, clsidCount);

    res = CoCreateInstance(clsids[0], NULL, CLSCTX_ALL, IID_PPV_ARGS(&s_transform));

    /* GetStreamCount and GetStreamIDs always return E_NOTIMPL */
    DWORD inputCount;
    DWORD outputCount;
    res = s_transform->GetStreamCount(&inputCount, &outputCount);

    DWORD inputIDs[16];
    DWORD outputIDs[16];

    res = s_transform->GetStreamIDs(inputCount, inputIDs,
                                  outputCount, outputIDs);

    res = s_transform->GetInputAvailableType(0, 0, &s_inputMediaType);
    res = s_transform->SetInputType(0, s_inputMediaType, 0);

    res = s_transform->GetOutputAvailableType(0, 1, &s_outputMediaType); // 1 here is because PCM outputer is the second output offered in ListTranscoders
    res = s_transform->SetOutputType(0, s_outputMediaType, 0);

    MFT_OUTPUT_STREAM_INFO outputStreamInfo;
    res = s_transform->GetOutputStreamInfo(0, &outputStreamInfo); 
    s_outputSampleSize = outputStreamInfo.cbSize;

    GUID inputCodec;
    GUID outputCodec;

    res = s_inputMediaType ->GetGUID(MF_MT_SUBTYPE, &inputCodec);
    res = s_outputMediaType->GetGUID(MF_MT_SUBTYPE, &outputCodec);

    CoTaskMemFree(clsids);
}

/*
http://msdn.microsoft.com/en-us/library/bb530106%28v=vs.85%29.aspx
http://msdn.microsoft.com/en-us/library/bb530123%28v=vs.85%29.aspx
*/

void ProcessData(const void* inputData,  DWORD inputSize,
                       void* outputData, DWORD &outputSize)
{
    HRESULT         res;
    IMFSample      *pSample;
    IMFMediaBuffer *pBuffer;
    BYTE *pData = NULL;

    /**** Create an input sample buffer, from the supplied data ****/
    res = MFCreateSample(&pSample);
    res = MFCreateMemoryBuffer(inputSize, &pBuffer);
    res = pBuffer->Lock(&pData, NULL, NULL);
    memcpy_s(pData, inputSize, inputData, inputSize);
    res = pBuffer->SetCurrentLength(inputSize);
    res = pBuffer->Unlock();
    res = pSample->AddBuffer(pBuffer);

    /**** Create output buffer ****/
    IMFSample      *pOutputSample;
    IMFMediaBuffer *pOutputBuffer;

    res = MFCreateSample(&pOutputSample);
    res = MFCreateMemoryBuffer(s_outputSampleSize, &pOutputBuffer);
    res = pOutputSample->AddBuffer(pOutputBuffer);

    MFT_OUTPUT_DATA_BUFFER outputDataBuffer; // can be an array
    outputDataBuffer.dwStreamID=0;
    outputDataBuffer.pSample=pOutputSample;
    outputDataBuffer.dwStatus=0;
    outputDataBuffer.pEvents = NULL;

    DWORD outputStatus=0;

    /*** Process the data, and get it back ****/
    res = s_transform->ProcessInput(0, pSample, 0);
    res = s_transform->ProcessOutput( MFT_PROCESS_OUTPUT_DISCARD_WHEN_NO_BUFFER, 
                                   1, &outputDataBuffer, &outputStatus);


    if (res==MF_E_TRANSFORM_STREAM_CHANGE)
    {
        // http://msdn.microsoft.com/en-us/library/windows/desktop/dd797815%28v=vs.85%29.aspx
        // indicates that the output always changes
        // but not how to handle it

        /* GetStreamCount and GetStreamIDs always return E_NOTIMPL */

        DWORD inputCount;
        DWORD outputCount;
        res = s_transform->GetStreamCount(&inputCount, &outputCount);

        DWORD inputIDs[16];
        DWORD outputIDs[16];

        res = s_transform->GetStreamIDs(inputCount, inputIDs,
                                      outputCount, outputIDs);

        res = s_transform->GetInputAvailableType(0, 0, &s_inputMediaType);
        res = s_transform->SetInputType(0, s_inputMediaType, 0);

        res = s_transform->GetOutputAvailableType(0, 1, &s_outputMediaType); // 1 here is because PCM outputer is the second output offered
        res = s_transform->SetOutputType(0, s_outputMediaType, 0);
    }

    /**** Extract converted audio from the sample ****/
    DWORD dwNumOutputBuffers, i;
    res = outputDataBuffer.pSample->GetBufferCount(&dwNumOutputBuffers);

    for(i=0; i<dwNumOutputBuffers; i++)
    {
        IMFMediaBuffer *outputBuffer;
        res = outputDataBuffer.pSample->GetBufferByIndex(i, &outputBuffer);
        BYTE *outData;
        DWORD outDataLen = 0;

        res = outputBuffer->Lock(&outData, NULL, &outDataLen);

        memcpy(outputBuffer, outData, outDataLen);

        res = outputBuffer->Unlock();
    }

    /* TODO: Release any neccessery references */
}

Solution

I'm doing a very similar thing. You must do a Set Output Type, otherwise your next processinput will give you MF_E_TRANSFORM_TYPE_NOT_SET,

You must also flush the decoder, otherwise your next process input will return MF_E_NOTACCEPTING. Working from your code, I got it to work by removing the set input type res adding after this line

res = s_transform->GetOutputAvailableType(0, 1, &s_outputMediaType); // 1 here is because PCM outputer is the second output offered
    GUID outputCodec;
    res = s_outputMediaType->GetGUID(MF_MT_SUBTYPE, &outputCodec);
    if (outputCodec == MFAudioFormat_PCM){
        printf("\nDecoder Output is expecting pcm format");                 
        res = s_transform->SetOutputType(0, s_outputMediaType, 0);//setting the type again

    }
    if (outputCodec == MFAudioFormat_Float){
        printf("\nDecoder Output is expecting float pcm format");
    }
    s_transform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH,NULL);        

    return res;//no output coming get another input to process.

For testing, I was using samples that came from a source reader to a good file. Be sure to play many packets, as some packets containing 'quiet' can be short and all 0.

Also, thus url http://msdn.microsoft.com/en-us/library/windows/desktop/ff485864%28v=vs.85%29.aspx nicely specifies how to configure the output. It is very useful.

Reading closely http://msdn.microsoft.com/en-us/library/windows/desktop/dd742784%28v=vs.85%29.aspx you can notice that some of the attributes (MF_MT_AUDIO_BITS_PER_SAMPLE) on the input media type tell the decoder how the output is to be formatted. If you are able to more fully specify the desired input and output you can avoid the stream change. Your 1st sample could instead be MF_E_TRANSFORM_NEED_MORE_INPUT .

Licensed under: CC-BY-SA with attribution

Not affiliated with StackOverflow