Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Video decoding with FFmpeg on iOS and Android

Video decoding with FFmpeg on iOS and Android

A look at how FFmpeg wraps and exposes the hardware encoding and decoding APIs available on modern phones and tablets running iOS or Android

from Demuxed 2018

Aman Karmani

October 17, 2018
Tweet

More Decks by Aman Karmani

Other Decks in Technology

Transcript

  1. Demuxed 2018
    @tmm1
    FFmpeg
    Video decoding with
    libavformat
    libavcodec
    libavutil
    libavfilter
    iOS
    on and
    VideoToolbox
    Apple TV
    iPhone
    iPad
    MediaCodec
    Android TV
    Fire TV
    Android

    View full-size slide

  2. Aman Gupta
    @tmm1
    @tmm1
    [email protected]
    Channels app founder
    2015
    GitHub Employee #18
    2011
    ruby-core committer
    2013
    FFmpeg committer
    2017

    View full-size slide

  3. ✋"#
    FFmpeg
    $ ffmpeg -i …

    View full-size slide

  4. ✋"#
    FFmpeg
    libavformat
    libavcodec
    libavutil
    libavfilter

    View full-size slide

  5. ✋"#
    FFmpeg
    libavformat
    libavcodec
    libavutil
    libavfilter
    git://
    git://
    git://
    git://

    View full-size slide

  6. FFmpeg
    libavformat libavcodec

    View full-size slide

  7. FFmpeg
    libavformat
    muxers + demuxers
    hls, dash, mpegts, …
    data stream → AVPacket*
    libavcodec

    View full-size slide

  8. FFmpeg
    libavformat
    protocols
    http, tcp, rtmp, udp, …
    muxers + demuxers
    hls, dash, mpegts, …
    data stream → AVPacket*
    libavcodec

    View full-size slide

  9. FFmpeg
    libavformat
    protocols
    http, tcp, rtmp, udp, …
    muxers + demuxers
    hls, dash, mpegts, …
    data stream → AVPacket*
    libavcodec
    encoders
    decoders + hwaccels
    AVFrame* → AVPacket*
    AVPacket* → AVFrame*
    mpeg2, h264, hevc, …

    View full-size slide

  10. FFmpeg iOS
    on VideoToolbox
    with

    View full-size slide

  11. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);

    View full-size slide

  12. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    {
    find decoder and create instance

    View full-size slide

  13. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    {
    find decoder and create instance
    {
    prepare decoder for data

    View full-size slide

  14. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    AVBufferRef *device_ref = NULL;
    av_hwdevice_ctx_create(
    &device_ref,
    AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
    NULL, NULL, 0);
    avctx->hw_device_ctx = device_ref;
    enum AVPixelFormat
    get_vt_format(struct AVCodecContext *avctx,
    const enum AVPixelFormat *fmt) {
    return AV_PIX_FMT_VIDEOTOOLBOX;
    }
    avctx->get_format = get_vt_format;
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: VIDEOTOOLBOX

    View full-size slide

  15. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    AVBufferRef *device_ref = NULL;
    av_hwdevice_ctx_create(
    &device_ref,
    AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
    NULL, NULL, 0);
    avctx->hw_device_ctx = device_ref;
    enum AVPixelFormat
    get_vt_format(struct AVCodecContext *avctx,
    const enum AVPixelFormat *fmt) {
    return AV_PIX_FMT_VIDEOTOOLBOX;
    }
    avctx->get_format = get_vt_format;
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: VIDEOTOOLBOX
    {
    create and attach HWDEVICE context

    View full-size slide

  16. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    AVBufferRef *device_ref = NULL;
    av_hwdevice_ctx_create(
    &device_ref,
    AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
    NULL, NULL, 0);
    avctx->hw_device_ctx = device_ref;
    enum AVPixelFormat
    get_vt_format(struct AVCodecContext *avctx,
    const enum AVPixelFormat *fmt) {
    return AV_PIX_FMT_VIDEOTOOLBOX;
    }
    avctx->get_format = get_vt_format;
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: VIDEOTOOLBOX
    {
    request videotoolbox pixel format

    View full-size slide

  17. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);

    View full-size slide

  18. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    {
    receive decoded frame

    View full-size slide

  19. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_VIDEOTOOLBOX);
    HARDWARE FRAMES: VIDEOTOOLBOX

    View full-size slide

  20. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_VIDEOTOOLBOX);
    HARDWARE FRAMES: VIDEOTOOLBOX
    {
    platform specific pixel format
    {
    generic pixel format

    View full-size slide

  21. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_VIDEOTOOLBOX);
    CVPixelBufferRef img =
    (CVPixelBufferRef)frame->planes[3];
    HARDWARE FRAMES: VIDEOTOOLBOX
    {
    unwrap to access iOS pixel buffer

    View full-size slide

  22. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_VIDEOTOOLBOX);
    CVPixelBufferRef img =
    (CVPixelBufferRef)frame->planes[3];
    HARDWARE FRAMES: VIDEOTOOLBOX
    {
    unwrap to access iOS pixel buffer
    // use pixel buffer to:
    // - render to UIImage on screen
    // - read video pixel data
    // - modify pixel data
    // - upload video frame to OpenGL tex
    //
    // or:
    // - convert back to generic software frame
    // - filter/render like with software decode

    View full-size slide

  23. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_VIDEOTOOLBOX);
    CVPixelBufferRef img =
    (CVPixelBufferRef)frame->planes[3];
    int planes_nb =
    CVPixelBufferGetPlaneCount(img);
    CVPixelBufferLockBaseAddress(img, 0);
    for (int i = 0; i < planes_nb; i++) {
    size_t height =
    CVPixelBufferGetHeightOfPlane(img,i);
    size_t rowsize =
    CVPixelBufferGetBytesPerRowOfPlane(img,i);
    uint8_t *rowdata =
    CVPixelBufferGetBaseAddressOfPlane(img,i);
    // modify rowdata
    }
    CVPixelBufferUnlockBaseAddress(img, 0);
    HARDWARE FRAMES: VIDEOTOOLBOX
    {
    read/write access to underlying memory

    View full-size slide

  24. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_VIDEOTOOLBOX);
    CVPixelBufferRef img =
    (CVPixelBufferRef)frame->planes[3];
    int planes_nb =
    CVPixelBufferGetPlaneCount(img);
    for (int i = 0; i < planes_nb; i++) {
    CVOpenGLESTextureCacheCreateTextureFromImage(
    ...
    );
    GLuint tex = CVOpenGLESTextureGetName(plane);
    // pass to GL shader for rendering
    }
    HARDWARE FRAMES: VIDEOTOOLBOX
    {
    transfer each plane to a OpenGL texture

    View full-size slide

  25. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_VIDEOTOOLBOX);
    AVFrame *swframe = av_frame_alloc();
    av_hwframe_transfer_data(swframe, frame, 0);
    assert(swframe->imgfmt ==
    AV_PIX_FMT_YUV420P);
    HARDWARE FRAMES: VIDEOTOOLBOX
    {
    convert back to a regular software frame

    View full-size slide

  26. FFmpeg on with MediaCodec

    View full-size slide

  27. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);

    View full-size slide

  28. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    JavaVM *vm = ...; // via JNI_OnLoad() etc
    av_jni_set_java_vm(vm, NULL);
    AVCodec *codec =
    avcodec_find_decoder_by_name(“h264_mediacodec”)
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: MEDIACODEC

    View full-size slide

  29. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    JavaVM *vm = ...; // via JNI_OnLoad() etc
    av_jni_set_java_vm(vm, NULL);
    AVCodec *codec =
    avcodec_find_decoder_by_name(“h264_mediacodec”)
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: MEDIACODEC
    {
    allow FFmpeg to access Android Java APIs

    View full-size slide

  30. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    JavaVM *vm = ...; // via JNI_OnLoad() etc
    av_jni_set_java_vm(vm, NULL);
    AVCodec *codec =
    avcodec_find_decoder_by_name(“h264_mediacodec")
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: MEDIACODEC
    {
    implemented as a separate decoder

    View full-size slide

  31. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    HARDWARE FRAMES: MEDIACODEC
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_NV12);

    View full-size slide

  32. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    HARDWARE FRAMES: MEDIACODEC
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_NV12);
    {
    generic pixel format
    {
    generic pixel format
    (decoded frame is copied back)

    View full-size slide

  33. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    JavaVM *vm = ...; // via JNI_OnLoad() etc
    av_jni_set_java_vm(vm, NULL);
    AVCodec *codec =
    avcodec_find_decoder_by_name(“h264_mediacodec")
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    jobject surface = ...; // android.view.Surface
    AVBufferRef *device_ref =
    av_hwdevice_ctx_alloc(
    AV_HWDEVICE_TYPE_MEDIACODEC);
    AVHWDeviceContext *ctx =
    (void *)device_ref->data;
    AVMediaCodecDeviceContext *hwctx = ctx->hwctx;
    hwctx->surface = (void *)(intptr_t)surface;
    av_hwdevice_ctx_init(device_ref);
    avctx->hw_device_ctx = device_ref;
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: MEDIACODEC SURFACE

    View full-size slide

  34. SOFTWARE DECODING
    AVCodec *codec =
    avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    avcodec_open2(avctx, codec, NULL);
    JavaVM *vm = ...; // via JNI_OnLoad() etc
    av_jni_set_java_vm(vm, NULL);
    AVCodec *codec =
    avcodec_find_decoder_by_name(“h264_mediacodec")
    AVCodecContext *avctx =
    avcodec_alloc_context3(codec);
    jobject surface = ...; // android.view.Surface
    AVBufferRef *device_ref =
    av_hwdevice_ctx_alloc(
    AV_HWDEVICE_TYPE_MEDIACODEC);
    AVHWDeviceContext *ctx =
    (void *)device_ref->data;
    AVMediaCodecDeviceContext *hwctx = ctx->hwctx;
    hwctx->surface = (void *)(intptr_t)surface;
    av_hwdevice_ctx_init(device_ref);
    avctx->hw_device_ctx = device_ref;
    avcodec_open2(avctx, codec, NULL);
    HARDWARE DECODING: MEDIACODEC SURFACE
    {
    create HWDEVICE context to pass in Surface

    View full-size slide

  35. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    HARDWARE FRAMES: MEDIACODEC SURFACE
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_MEDIACODEC);

    View full-size slide

  36. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    HARDWARE FRAMES: MEDIACODEC SURFACE
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_MEDIACODEC);
    {
    platform specific pixel format
    {
    generic pixel format

    View full-size slide

  37. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    HARDWARE FRAMES: MEDIACODEC SURFACE
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_MEDIACODEC);
    AVMediaCodecBuffer *buffer =
    (AVMediaCodecBuffer *)frame->planes[3];
    {
    unwrap to access MediaCodec Output Buffer

    View full-size slide

  38. SOFTWARE FRAMES
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_YUV420P);
    HARDWARE FRAMES: MEDIACODEC SURFACE
    AVFrame *frame = av_frame_alloc();
    int ret = avcodec_receive_frame(avctx,
    frame);
    assert(frame->imgfmt ==
    AV_PIX_FMT_MEDIACODEC);
    AVMediaCodecBuffer *buffer =
    (AVMediaCodecBuffer *)frame->planes[3];
    // drop frame
    av_mediacodec_release_buffer(buffer, 0);
    // render to surface
    av_mediacodec_release_buffer(buffer, 1);
    // render at clock time
    av_mediacodec_render_buffer_at_time(buffer,
    nanotime);
    {
    render Output Buffer to screen

    View full-size slide

  39. iOS
    STATE OF THE FFmpeg
    ☑ videotoolbox encoder
    h264, hevc
    ☑ videotoolbox hwaccel decoder
    h264, hevc
    ⬜ videotoolbox decoder (async)
    ☑ audiotoolbox encoder
    aac, alac, ilbc, pcm
    ☑ audiotoolbox decoder
    aac, ac3, eac3, mp3, …
    ⬜ mediacodec video encoder
    ☑ mediacodec video decoder
    mpeg2, h264, hevc
    ⬜ mediacodec audio encoder
    ⬜ mediacodec audio decoder
    ⬜ replace JNI with NDK (API 21+)
    ⬜ use async NDK decode (API 27+)

    View full-size slide

  40. HELP ME IMPROVE FFMPEG ON MOBILE PLATFORMS!
    Aman Gupta
    @tmm1
    @tmm1
    [email protected]
    Demuxed 2018
    Thank you

    View full-size slide