Как можно кодировать серию изображений в H264, используя API X264 C?

https://stackoverflow.com/questions/2940671

05-10-2019
|

Вопрос

Как используется API X264 C для кодирования изображений RBG в кадры H264? Я уже создал последовательность изображений RBG, как я могу сейчас преобразовать эту последовательность в последовательность кадров H264? В частности, как мне кодировать эту последовательность изображений RGB в последовательность рамы H264, состоящей из одного начального ключевого квадрата H264, с которым следует зависимые кадры H264?

Решение

Прежде всего: проверьте файл x264.h, он содержит более или менее ссылку для каждой функции и структуры. Файл X264.C Вы можете найти в загрузке, содержит примеров реализации. Большинство людей говорят, что основывают себя на этом, но я нахожу его довольно сложным для начинающих, это хорошо, насколько приведен пример назад.

Сначала вы настроили некоторые параметры типа x264_param_t, хороший сайт, описывающий параметры, http://mewiki.project357.com/wiki/x264_settings Отказ Также посмотрите на x264_param_default_preset Функция, которая позволяет нацелиться на некоторые функциональные возможности без необходимости понимать все (иногда довольно сложные) параметры. Также использовать x264_param_apply_profile после этого (вы, вероятно, захотите профиль «Базовый»)

Это какой-то пример настройки из моего кода:

x264_param_t param;
x264_param_default_preset(&param, "veryfast", "zerolatency");
param.i_threads = 1;
param.i_width = width;
param.i_height = height;
param.i_fps_num = fps;
param.i_fps_den = 1;
// Intra refres:
param.i_keyint_max = fps;
param.b_intra_refresh = 1;
//Rate control:
param.rc.i_rc_method = X264_RC_CRF;
param.rc.f_rf_constant = 25;
param.rc.f_rf_constant_max = 35;
//For streaming:
param.b_repeat_headers = 1;
param.b_annexb = 1;
x264_param_apply_profile(&param, "baseline");

После этого вы можете инициализировать кодировщик следующим образом

x264_t* encoder = x264_encoder_open(&param);
x264_picture_t pic_in, pic_out;
x264_picture_alloc(&pic_in, X264_CSP_I420, w, h)

X264 ожидает данных YUV420P (я думаю, что некоторые другие также, но это общее). Вы можете использовать libswscale (от ffmpeg) для преобразования изображений вправый формат. Инициализация это похоже на это (я предполагаю, что данные RGB с 24bpp).

struct SwsContext* convertCtx = sws_getContext(in_w, in_h, PIX_FMT_RGB24, out_w, out_h, PIX_FMT_YUV420P, SWS_FAST_BILINEAR, NULL, NULL, NULL);

Кодировка так же просто, как это, для каждого кадра:

//data is a pointer to you RGB structure
int srcstride = w*3; //RGB stride is just 3*width
sws_scale(convertCtx, &data, &srcstride, 0, h, pic_in.img.plane, pic_in.img.stride);
x264_nal_t* nals;
int i_nals;
int frame_size = x264_encoder_encode(encoder, &nals, &i_nals, &pic_in, &pic_out);
if (frame_size >= 0)
{
    // OK
}

Я надеюсь, что вы поймете вас;), я провел много времени на сам, чтобы начать. X264 - безумно сильный, но иногда сложный кусок программного обеспечения.

Редактировать: При использовании других параметров будет задержка кадров, это не так с моими параметрами (в основном из-за параметра Nolathency). Если это так, Frame_size иногда будет равен нулю, и вам придется позвонить x264_encoder_encode до тех пор, пока функция x264_encoder_delayed_frames Не возвращается 0. Но для этой функции вы должны взять более глубокую заглянуть в X264 ° C и X264.h.

Другие советы

Я загрузил пример, который генерирует сырые кадры YUV, а затем кодирует их с помощью X264. Полный код можно найти здесь: https://gist.github.com/roxlu/6453908.

FFMPEG 2.8.6 C Runnable Пример

Использование FFPMEG в качестве обертки для X264 - хорошая идея, поскольку она обнародовала единую API для нескольких энкодеров. Поэтому, если вам когда-нибудь нужно изменить форматы, вы можете изменить только один параметр вместо изучения нового API.

Пример синтезируется и кодирует некоторые красочные рамки, генерируемые generate_rgb.

Контроль типа кадра (Я, р, б) иметь как можно меньше основных кадров (в идеале только первое) обсуждается здесь: https://stackoverflow.com/a/36412909/895245. Как уже упоминалось, я не рекомендую это для большинства приложений.

Ключевые строки, которые здесь контролируют тип кадра:

/* Minimal distance of I-frames. This is the maximum value allowed,
or else we get a warning at runtime. */
c->keyint_min = 600;

а также:

if (frame->pts == 1) {
    frame->key_frame = 1;
    frame->pict_type = AV_PICTURE_TYPE_I;
} else {
    frame->key_frame = 0;
    frame->pict_type = AV_PICTURE_TYPE_P;
}

Затем мы можем проверить тип кадра с:

ffprobe -select_streams v \
    -show_frames \
    -show_entries frame=pict_type \
    -of csv \
    tmp.h264

Как уже упоминалось на: https://superuser.com/Questions/885452/extracting-the-index-of-key-frames-from-a-video-using-ffmpeg

Предварительный просмотр сгенерированного вывода.

Main.c.

#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>

static AVCodecContext *c = NULL;
static AVFrame *frame;
static AVPacket pkt;
static FILE *file;
struct SwsContext *sws_context = NULL;

static void ffmpeg_encoder_set_frame_yuv_from_rgb(uint8_t *rgb) {
    const int in_linesize[1] = { 3 * c->width };
    sws_context = sws_getCachedContext(sws_context,
            c->width, c->height, AV_PIX_FMT_RGB24,
            c->width, c->height, AV_PIX_FMT_YUV420P,
            0, 0, 0, 0);
    sws_scale(sws_context, (const uint8_t * const *)&rgb, in_linesize, 0,
            c->height, frame->data, frame->linesize);
}

uint8_t* generate_rgb(int width, int height, int pts, uint8_t *rgb) {
    int x, y, cur;
    rgb = realloc(rgb, 3 * sizeof(uint8_t) * height * width);
    for (y = 0; y < height; y++) {
        for (x = 0; x < width; x++) {
            cur = 3 * (y * width + x);
            rgb[cur + 0] = 0;
            rgb[cur + 1] = 0;
            rgb[cur + 2] = 0;
            if ((frame->pts / 25) % 2 == 0) {
                if (y < height / 2) {
                    if (x < width / 2) {
                        /* Black. */
                    } else {
                        rgb[cur + 0] = 255;
                    }
                } else {
                    if (x < width / 2) {
                        rgb[cur + 1] = 255;
                    } else {
                        rgb[cur + 2] = 255;
                    }
                }
            } else {
                if (y < height / 2) {
                    rgb[cur + 0] = 255;
                    if (x < width / 2) {
                        rgb[cur + 1] = 255;
                    } else {
                        rgb[cur + 2] = 255;
                    }
                } else {
                    if (x < width / 2) {
                        rgb[cur + 1] = 255;
                        rgb[cur + 2] = 255;
                    } else {
                        rgb[cur + 0] = 255;
                        rgb[cur + 1] = 255;
                        rgb[cur + 2] = 255;
                    }
                }
            }
        }
    }
    return rgb;
}

/* Allocate resources and write header data to the output file. */
void ffmpeg_encoder_start(const char *filename, int codec_id, int fps, int width, int height) {
    AVCodec *codec;
    int ret;

    codec = avcodec_find_encoder(codec_id);
    if (!codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }
    c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate video codec context\n");
        exit(1);
    }
    c->bit_rate = 400000;
    c->width = width;
    c->height = height;
    c->time_base.num = 1;
    c->time_base.den = fps;
    c->keyint_min = 600;
    c->pix_fmt = AV_PIX_FMT_YUV420P;
    if (codec_id == AV_CODEC_ID_H264)
        av_opt_set(c->priv_data, "preset", "slow", 0);
    if (avcodec_open2(c, codec, NULL) < 0) {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }
    file = fopen(filename, "wb");
    if (!file) {
        fprintf(stderr, "Could not open %s\n", filename);
        exit(1);
    }
    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }
    frame->format = c->pix_fmt;
    frame->width  = c->width;
    frame->height = c->height;
    ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height, c->pix_fmt, 32);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate raw picture buffer\n");
        exit(1);
    }
}

/*
Write trailing data to the output file
and free resources allocated by ffmpeg_encoder_start.
*/
void ffmpeg_encoder_finish(void) {
    uint8_t endcode[] = { 0, 0, 1, 0xb7 };
    int got_output, ret;
    do {
        fflush(stdout);
        ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
        if (ret < 0) {
            fprintf(stderr, "Error encoding frame\n");
            exit(1);
        }
        if (got_output) {
            fwrite(pkt.data, 1, pkt.size, file);
            av_packet_unref(&pkt);
        }
    } while (got_output);
    fwrite(endcode, 1, sizeof(endcode), file);
    fclose(file);
    avcodec_close(c);
    av_free(c);
    av_freep(&frame->data[0]);
    av_frame_free(&frame);
}

/*
Encode one frame from an RGB24 input and save it to the output file.
Must be called after ffmpeg_encoder_start, and ffmpeg_encoder_finish
must be called after the last call to this function.
*/
void ffmpeg_encoder_encode_frame(uint8_t *rgb) {
    int ret, got_output;
    ffmpeg_encoder_set_frame_yuv_from_rgb(rgb);
    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;
    if (frame->pts == 1) {
        frame->key_frame = 1;
        frame->pict_type = AV_PICTURE_TYPE_I;
    } else {
        frame->key_frame = 0;
        frame->pict_type = AV_PICTURE_TYPE_P;
    }
    ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
    if (ret < 0) {
        fprintf(stderr, "Error encoding frame\n");
        exit(1);
    }
    if (got_output) {
        fwrite(pkt.data, 1, pkt.size, file);
        av_packet_unref(&pkt);
    }
}

/* Represents the main loop of an application which generates one frame per loop. */
static void encode_example(const char *filename, int codec_id) {
    int pts;
    int width = 320;
    int height = 240;
    uint8_t *rgb = NULL;
    ffmpeg_encoder_start(filename, codec_id, 25, width, height);
    for (pts = 0; pts < 100; pts++) {
        frame->pts = pts;
        rgb = generate_rgb(width, height, pts, rgb);
        ffmpeg_encoder_encode_frame(rgb);
    }
    ffmpeg_encoder_finish();
}

int main(void) {
    avcodec_register_all();
    encode_example("tmp.h264", AV_CODEC_ID_H264);
    encode_example("tmp.mpg", AV_CODEC_ID_MPEG1VIDEO);
    return 0;
}

Компилировать и бегать с:

gcc -o main.out -std=c99 -Wextra main.c -lavcodec -lswscale -lavutil
./main.out
ffplay tmp.mpg
ffplay tmp.h264

Испытано на Ubuntu 16.04. Github вверх по течению.

Лицензировано под: CC-BY-SA с атрибуция

Не связан с StackOverflow