diff --git a/c_src/xav/decoder.c b/c_src/xav/decoder.c index 5682932..a71838d 100644 --- a/c_src/xav/decoder.c +++ b/c_src/xav/decoder.c @@ -20,6 +20,12 @@ int decoder_init(struct Decoder *decoder, const char *codec) { } else if (strcmp(codec, "vp8") == 0) { decoder->media_type = AVMEDIA_TYPE_VIDEO; decoder->codec = avcodec_find_decoder(AV_CODEC_ID_VP8); + } else if (strcmp(codec, "h264") == 0) { + decoder->media_type = AVMEDIA_TYPE_VIDEO; + decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H264); + } else if (strcmp(codec, "h265") == 0) { + decoder->media_type = AVMEDIA_TYPE_VIDEO; + decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H265); } else { return -1; } @@ -60,6 +66,26 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) { return avcodec_receive_frame(decoder->c, frame); } +int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count) { + int ret = avcodec_send_packet(decoder->c, NULL); + if (ret != 0) { + return ret; + } + + while (1) { + ret = avcodec_receive_frame(decoder->c, frames[*frames_count]); + if (ret == AVERROR_EOF) { + break; + } else if (ret < 0) { + return ret; + } + + *frames_count += 1; + } + + return 0; +} + void decoder_free_frame(struct Decoder *decoder) { if (decoder->frame != NULL) { av_frame_unref(decoder->frame); diff --git a/c_src/xav/decoder.h b/c_src/xav/decoder.h index 364e718..a4275fe 100644 --- a/c_src/xav/decoder.h +++ b/c_src/xav/decoder.h @@ -4,6 +4,8 @@ #include "audio_converter.h" #include "utils.h" +#define MAX_FLUSH_BUFFER 16 + struct Decoder { enum AVMediaType media_type; AVFrame *frame; @@ -18,6 +20,8 @@ int decoder_init(struct Decoder *decoder, const char *codec); int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame); +int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count); + void decoder_free_frame(struct Decoder *decoder); void decoder_free(struct Decoder **decoder); \ No newline at end of file diff --git a/c_src/xav/xav_decoder.c b/c_src/xav/xav_decoder.c index ab0bfe3..a7f7374 100644 --- a/c_src/xav/xav_decoder.c +++ b/c_src/xav/xav_decoder.c @@ -6,6 +6,12 @@ ErlNifResourceType *xav_decoder_resource_type; static int init_audio_converter(struct XavDecoder *xav_decoder); +void free_frames(AVFrame **frames, int size) { + for (int i = 0; i < size; i++) { + av_frame_unref(frames[i]); + } +} + ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { if (argc != 4) { return xav_nif_raise(env, "invalid_arg_count"); @@ -66,6 +72,59 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { return decoder_term; } +ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* frame) { + ERL_NIF_TERM frame_term; + int ret; + + if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) { + XAV_LOG_DEBUG("Converting video to RGB"); + + uint8_t *out_data[4]; + int out_linesize[4]; + + ret = video_converter_convert(frame, out_data, out_linesize); + if (ret <= 0) { + return xav_nif_raise(env, "failed_to_decode"); + } + + frame_term = xav_nif_video_frame_to_term(env, frame, out_data, out_linesize, "rgb"); + + av_freep(&out_data[0]); + } else if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_AUDIO) { + XAV_LOG_DEBUG("Converting audio to desired out format"); + + uint8_t **out_data; + int out_samples; + int out_size; + + if (xav_decoder->ac == NULL) { + ret = init_audio_converter(xav_decoder); + if (ret < 0) { + return xav_nif_raise(env, "failed_to_init_converter");; + } + } + + ret = audio_converter_convert(xav_decoder->ac, frame, &out_data, &out_samples, &out_size); + if (ret < 0) { + return xav_nif_raise(env, "failed_to_decode"); + } + + const char *out_format = av_get_sample_fmt_name(xav_decoder->ac->out_sample_fmt); + + if (strcmp(out_format, "flt") == 0) { + out_format = "f32"; + } else if (strcmp(out_format, "dbl") == 0) { + out_format = "f64"; + } + + frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, out_format, frame->pts); + + av_freep(&out_data[0]); + } + + return frame_term; +} + ERL_NIF_TERM decode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { ERL_NIF_TERM frame_term; @@ -111,59 +170,44 @@ ERL_NIF_TERM decode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { return xav_nif_raise(env, "failed_to_decode"); } - // convert - if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) { - XAV_LOG_DEBUG("Converting video to RGB"); - - uint8_t *out_data[4]; - int out_linesize[4]; - - ret = video_converter_convert(xav_decoder->decoder->frame, out_data, out_linesize); - if (ret <= 0) { - return xav_nif_raise(env, "failed_to_decode"); - } - - frame_term = xav_nif_video_frame_to_term(env, xav_decoder->decoder->frame, out_data, - out_linesize, "rgb"); + frame_term = convert(env, xav_decoder, xav_decoder->decoder->frame); - av_freep(&out_data[0]); - } else if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_AUDIO) { - XAV_LOG_DEBUG("Converting audio to desired out format"); + decoder_free_frame(xav_decoder->decoder); - uint8_t **out_data; - int out_samples; - int out_size; + return xav_nif_ok(env, frame_term); +} - if (xav_decoder->ac == NULL) { - ret = init_audio_converter(xav_decoder); - if (ret < 0) { - return ret; - } - } +ERL_NIF_TERM flush(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 1) { + return xav_nif_raise(env, "invalid_arg_count"); + } - ret = audio_converter_convert(xav_decoder->ac, xav_decoder->decoder->frame, &out_data, - &out_samples, &out_size); - if (ret < 0) { - return xav_nif_raise(env, "failed_to_decode"); - } + struct XavDecoder *xav_decoder; + if (!enif_get_resource(env, argv[0], xav_decoder_resource_type, (void **)&xav_decoder)) { + return xav_nif_raise(env, "couldnt_get_decoder_resource"); + } - const char *out_format = av_get_sample_fmt_name(xav_decoder->ac->out_sample_fmt); + AVFrame *frames[MAX_FLUSH_BUFFER]; + int frames_count = 0; - if (strcmp(out_format, "flt") == 0) { - out_format = "f32"; - } else if (strcmp(out_format, "dbl") == 0) { - out_format = "f64"; - } + for (int i = 0; i < MAX_FLUSH_BUFFER; i++) { + frames[i] = av_frame_alloc(); + } - frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, out_format, - xav_decoder->decoder->frame->pts); + int ret = decoder_flush(xav_decoder->decoder, frames, &frames_count); + if (ret < 0) { + free_frames(frames, MAX_FLUSH_BUFFER); + return xav_nif_error(env, "failed_to_flush"); + } - av_freep(&out_data[0]); + ERL_NIF_TERM frame_terms[frames_count]; + for (int i = 0; i < frames_count; i++) { + frame_terms[i] = convert(env, xav_decoder, frames[i]); } - decoder_free_frame(xav_decoder->decoder); + free_frames(frames, MAX_FLUSH_BUFFER); - return xav_nif_ok(env, frame_term); + return xav_nif_ok(env, enif_make_list_from_array(env, frame_terms, frames_count)); } static int init_audio_converter(struct XavDecoder *xav_decoder) { @@ -235,7 +279,8 @@ void free_xav_decoder(ErlNifEnv *env, void *obj) { } static ErlNifFunc xav_funcs[] = {{"new", 4, new}, - {"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; + {"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND}, + {"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { xav_decoder_resource_type = diff --git a/lib/decoder.ex b/lib/decoder.ex index bb9230b..7870857 100644 --- a/lib/decoder.ex +++ b/lib/decoder.ex @@ -6,7 +6,7 @@ defmodule Xav.Decoder do @typedoc """ Supported codecs. """ - @type codec() :: :opus | :vp8 + @type codec() :: :opus | :vp8 | :h264 | :h265 @type t() :: reference() @@ -92,4 +92,32 @@ defmodule Xav.Decoder do error end end + + @doc """ + Flush the decoder. + + Flushing signals end of stream and force the decoder to return + the buffered frames if there's any. + """ + @spec flush(t()) :: {:ok, [Xav.Frame.t()]} | {:error, atom()} + def flush(decoder) do + with {:ok, frames} <- Xav.Decoder.NIF.flush(decoder) do + frames = + Enum.map(frames, fn {data, format, width, height, pts} -> + Xav.Frame.new(data, format, width, height, pts) + end) + + {:ok, frames} + end + end + + @doc """ + Same as `flush/1` but raises an exception on error. + """ + def flush!(decoder) do + case flush(decoder) do + {:ok, frames} -> frames + {:error, reason} -> raise "Failed to flush decoder: #{inspect(reason)}" + end + end end diff --git a/lib/decoder_nif.ex b/lib/decoder_nif.ex index df6751c..f2468d2 100644 --- a/lib/decoder_nif.ex +++ b/lib/decoder_nif.ex @@ -11,4 +11,6 @@ defmodule Xav.Decoder.NIF do def new(_codec, _out_format, _out_sample_rate, _out_channels), do: :erlang.nif_error(:undef) def decode(_decoder, _data, _pts, _dts), do: :erlang.nif_error(:undef) + + def flush(_decoder), do: :erlang.nif_error(:undef) end diff --git a/test/decoder_test.exs b/test/decoder_test.exs index 2682ffd..3e3737e 100644 --- a/test/decoder_test.exs +++ b/test/decoder_test.exs @@ -286,6 +286,9 @@ defmodule Xav.DecoderTest do 142, 204, 5, 106, 217, 175, 162, 62, 128, 161, 69, 136, 234, 30, 43, 165, 152, 104, 143>> + @h264_frame File.read!("test/fixtures/decoder/sample_h264.h264") + @h265_frame File.read!("test/fixtures/decoder/sample_h265.h265") + test "new/0" do assert decoder = Xav.Decoder.new(:vp8) assert is_reference(decoder) @@ -329,5 +332,23 @@ defmodule Xav.DecoderTest do assert {:error, :no_keyframe} = Xav.Decoder.decode(decoder, @vp8_frame) end + + test "h264 video" do + decoder = Xav.Decoder.new(:h264) + + assert :ok = Xav.Decoder.decode(decoder, @h264_frame) + + assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :rgb}]} = + Xav.Decoder.flush(decoder) + end + + test "h265 video" do + decoder = Xav.Decoder.new(:h265) + + assert :ok = Xav.Decoder.decode(decoder, @h265_frame) + + assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :rgb}]} = + Xav.Decoder.flush(decoder) + end end end diff --git a/test/fixtures/decoder/sample_h264.h264 b/test/fixtures/decoder/sample_h264.h264 new file mode 100644 index 0000000..7011d10 Binary files /dev/null and b/test/fixtures/decoder/sample_h264.h264 differ diff --git a/test/fixtures/decoder/sample_h265.h265 b/test/fixtures/decoder/sample_h265.h265 new file mode 100644 index 0000000..6b4d45b Binary files /dev/null and b/test/fixtures/decoder/sample_h265.h265 differ