From c932e1777cb961c12ef23b925f459a8dd3bd5569 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sun, 30 Jun 2024 12:11:15 +0200 Subject: [PATCH] Updates for encoder --- pkg/ffmpeg/encoder.go | 222 +++++++++++++++++++++++++++++ pkg/ffmpeg/opts.go | 101 ++++++++++--- pkg/ffmpeg/packet_encoder.go | 105 -------------- pkg/ffmpeg/par.go | 209 +++++++++++++++++++++++++++ pkg/ffmpeg/par_test.go | 28 ++++ pkg/ffmpeg/resampler.go | 30 ++-- pkg/ffmpeg/resampler_test.go | 2 +- pkg/ffmpeg/rescaler.go | 24 ++-- pkg/ffmpeg/writer.go | 146 +++++++++++++++++-- pkg/ffmpeg/writer_test.go | 32 ++++- pkg/generator/sine.go | 29 ++-- pkg/generator/sine_test.go | 7 +- pkg/generator/yuv420p.go | 4 + sys/ffmpeg61/avcodec.go | 2 + sys/ffmpeg61/avcodec_parameters.go | 51 ++++++- 15 files changed, 814 insertions(+), 178 deletions(-) create mode 100644 pkg/ffmpeg/encoder.go delete mode 100644 pkg/ffmpeg/packet_encoder.go create mode 100644 pkg/ffmpeg/par.go create mode 100644 pkg/ffmpeg/par_test.go diff --git a/pkg/ffmpeg/encoder.go b/pkg/ffmpeg/encoder.go new file mode 100644 index 0000000..142ab39 --- /dev/null +++ b/pkg/ffmpeg/encoder.go @@ -0,0 +1,222 @@ +package ffmpeg + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "syscall" + + // Packages + ff "github.com/mutablelogic/go-media/sys/ffmpeg61" + + // Namespace imports + . "github.com/djthorpe/go-errors" +) + +//////////////////////////////////////////////////////////////////////////////// +// TYPES + +type Encoder struct { + ctx *ff.AVCodecContext + stream *ff.AVStream + packet *ff.AVPacket + //next_pts int64 +} + +// EncoderFrameFn is a function which is called to receive a frame to encode. It should +// return nil to continue encoding or io.EOF to stop encoding. +type EncoderFrameFn func(int) (*ff.AVFrame, error) + +// EncoderPacketFn is a function which is called for each packet encoded. It should +// return nil to continue encoding or io.EOF to stop encoding immediately. +type EncoderPacketFn func(*ff.AVPacket) error + +//////////////////////////////////////////////////////////////////////////////// +// LIFECYCLE + +// Create an encoder with the given parameters +func NewEncoder(ctx *ff.AVFormatContext, stream int, par *Par) (*Encoder, error) { + encoder := new(Encoder) + + // Get codec + codec_id := ff.AV_CODEC_ID_NONE + switch par.CodecType() { + case ff.AVMEDIA_TYPE_AUDIO: + codec_id = ctx.Output().AudioCodec() + case ff.AVMEDIA_TYPE_VIDEO: + codec_id = ctx.Output().VideoCodec() + case ff.AVMEDIA_TYPE_SUBTITLE: + codec_id = ctx.Output().SubtitleCodec() + } + if codec_id == ff.AV_CODEC_ID_NONE { + return nil, ErrBadParameter.Withf("no codec specified for stream %v", stream) + } + + // Allocate codec + codec := ff.AVCodec_find_encoder(codec_id) + if codec == nil { + return nil, ErrBadParameter.Withf("codec %q cannot encode", codec_id) + } + if codecctx := ff.AVCodec_alloc_context(codec); codecctx == nil { + return nil, ErrInternalAppError.With("could not allocate audio codec context") + } else { + encoder.ctx = codecctx + } + + // Check codec against parameters and set defaults as needed, then + // copy back to codec + if err := par.ValidateFromCodec(encoder.ctx); err != nil { + ff.AVCodec_free_context(encoder.ctx) + return nil, err + } else if err := par.CopyToCodec(encoder.ctx); err != nil { + ff.AVCodec_free_context(encoder.ctx) + return nil, err + } + + // Create the stream + if streamctx := ff.AVFormat_new_stream(ctx, nil); streamctx == nil { + ff.AVCodec_free_context(encoder.ctx) + return nil, ErrInternalAppError.With("could not allocate stream") + } else { + streamctx.SetId(stream) + encoder.stream = streamctx + } + + // Copy parameters to stream + if err := ff.AVCodec_parameters_from_context(encoder.stream.CodecPar(), encoder.ctx); err != nil { + ff.AVCodec_free_context(encoder.ctx) + return nil, err + } + + // Some formats want stream headers to be separate. + if ctx.Output().Flags().Is(ff.AVFMT_GLOBALHEADER) { + encoder.ctx.SetFlags(encoder.ctx.Flags() | ff.AV_CODEC_FLAG_GLOBAL_HEADER) + } + + // Open it + if err := ff.AVCodec_open(encoder.ctx, codec, nil); err != nil { + ff.AVCodec_free_context(encoder.ctx) + return nil, ErrInternalAppError.Withf("codec_open: %v", err) + } + + // Create a packet + packet := ff.AVCodec_packet_alloc() + if packet == nil { + ff.AVCodec_free_context(encoder.ctx) + return nil, errors.New("failed to allocate packet") + } else { + encoder.packet = packet + } + + // Return it + return encoder, nil +} + +func (encoder *Encoder) Close() error { + // Free respurces + if encoder.ctx != nil { + ff.AVCodec_free_context(encoder.ctx) + } + if encoder.packet != nil { + ff.AVCodec_packet_free(encoder.packet) + } + + // Release resources + encoder.packet = nil + encoder.stream = nil + encoder.ctx = nil + + // Return success + return nil +} + +//////////////////////////////////////////////////////////////////////////////// +// STRINGIFY + +func (e *Encoder) MarshalJSON() ([]byte, error) { + type jsonEncoder struct { + Codec *ff.AVCodecContext `json:"codec"` + Stream *ff.AVStream `json:"stream"` + } + return json.Marshal(&jsonEncoder{ + Codec: e.ctx, + Stream: e.stream, + }) +} + +func (e *Encoder) String() string { + data, _ := json.MarshalIndent(e, "", " ") + return string(data) +} + +////////////////////////////////////////////////////////////////////////////// +// PUBLIC METHODS + +// Encode a frame and pass packets to the EncoderPacketFn. If the frame is nil, then +// the encoder will flush any remaining packets. If io.EOF is returned then +// it indicates that the encoder has ended prematurely. +func (e *Encoder) Encode(frame *ff.AVFrame, fn EncoderPacketFn) error { + if fn == nil { + return ErrBadParameter.With("nil fn") + } + // Encode a frame (or flush the encoder) + return e.encode(frame, fn) +} + +// Return the codec parameters +func (e *Encoder) Par() *Par { + par := new(Par) + if err := ff.AVCodec_parameters_from_context(&par.AVCodecParameters, e.ctx); err != nil { + return nil + } else { + return par + } +} + +////////////////////////////////////////////////////////////////////////////// +// PRIVATE METHODS + +func (e *Encoder) encode(frame *ff.AVFrame, fn EncoderPacketFn) error { + // Send the frame to the encoder + fmt.Println("Sending frame", frame) + if err := ff.AVCodec_send_frame(e.ctx, frame); err != nil { + if errors.Is(err, syscall.EAGAIN) || errors.Is(err, io.EOF) { + return nil + } + return err + } + + // Write out the packets + var result error + for { + // Receive the packet + fmt.Println("Receiving packet") + if err := ff.AVCodec_receive_packet(e.ctx, e.packet); errors.Is(err, syscall.EAGAIN) || errors.Is(err, io.EOF) { + // Finished receiving packet or EOF + break + } else if err != nil { + return err + } + + // Pass back to the caller + if err := fn(e.packet); errors.Is(err, io.EOF) { + // End early, return EOF + result = io.EOF + break + } else if err != nil { + return err + } + + // Re-allocate frames for next iteration + ff.AVCodec_packet_unref(e.packet) + } + + // Flush + if result == nil { + result = fn(nil) + } + + // Return success or EOF + return result +} diff --git a/pkg/ffmpeg/opts.go b/pkg/ffmpeg/opts.go index 990e2f5..90b8c63 100644 --- a/pkg/ffmpeg/opts.go +++ b/pkg/ffmpeg/opts.go @@ -15,18 +15,23 @@ type Opt func(*opts) error type opts struct { // Resample/resize options force bool + par *Par // Format options oformat *ffmpeg.AVOutputFormat - // Audio options - sample_fmt ffmpeg.AVSampleFormat - ch ffmpeg.AVChannelLayout - samplerate int + // Stream options + streams map[int]*Par +} + +//////////////////////////////////////////////////////////////////////////////// +// LIFECYCLE - // Video options - pix_fmt ffmpeg.AVPixelFormat - width, height int +func newOpts() *opts { + return &opts{ + par: new(Par), + streams: make(map[int]*Par), + } } //////////////////////////////////////////////////////////////////////////////// @@ -45,6 +50,50 @@ func OptOutputFormat(name string) Opt { } } +// New audio stream with parameters +func OptAudioStream(stream int, par *Par) Opt { + return func(o *opts) error { + if par == nil || par.CodecType() != ffmpeg.AVMEDIA_TYPE_AUDIO { + return ErrBadParameter.With("invalid audio parameters") + } + if stream == 0 { + stream = len(o.streams) + 1 + } + if _, exists := o.streams[stream]; exists { + return ErrDuplicateEntry.Withf("stream %v", stream) + } + if stream < 0 { + return ErrBadParameter.Withf("invalid stream %v", stream) + } + o.streams[stream] = par + + // Return success + return nil + } +} + +// New video stream with parameters +func OptVideoStream(stream int, par *Par) Opt { + return func(o *opts) error { + if par == nil || par.CodecType() != ffmpeg.AVMEDIA_TYPE_VIDEO { + return ErrBadParameter.With("invalid video parameters") + } + if stream == 0 { + stream = len(o.streams) + 1 + } + if _, exists := o.streams[stream]; exists { + return ErrDuplicateEntry.Withf("stream %v", stream) + } + if stream < 0 { + return ErrBadParameter.Withf("invalid stream %v", stream) + } + o.streams[stream] = par + + // Return success + return nil + } +} + // Force resampling and resizing on decode, even if the input and output // parameters are the same func OptForce() Opt { @@ -61,7 +110,8 @@ func OptPixFormat(format string) Opt { if fmt == ffmpeg.AV_PIX_FMT_NONE { return ErrBadParameter.Withf("invalid pixel format %q", format) } - o.pix_fmt = fmt + o.par.SetCodecType(ffmpeg.AVMEDIA_TYPE_VIDEO) + o.par.SetPixelFormat(fmt) return nil } } @@ -72,8 +122,9 @@ func OptWidthHeight(w, h int) Opt { if w <= 0 || h <= 0 { return ErrBadParameter.Withf("invalid width %v or height %v", w, h) } - o.width = w - o.height = h + o.par.SetCodecType(ffmpeg.AVMEDIA_TYPE_VIDEO) + o.par.SetWidth(w) + o.par.SetHeight(h) return nil } } @@ -85,8 +136,9 @@ func OptFrameSize(size string) Opt { if err != nil { return ErrBadParameter.Withf("invalid frame size %q", size) } - o.width = w - o.height = h + o.par.SetCodecType(ffmpeg.AVMEDIA_TYPE_VIDEO) + o.par.SetWidth(w) + o.par.SetHeight(h) return nil } } @@ -94,18 +146,25 @@ func OptFrameSize(size string) Opt { // Channel layout func OptChannelLayout(layout string) Opt { return func(o *opts) error { - return ffmpeg.AVUtil_channel_layout_from_string(&o.ch, layout) + var ch ffmpeg.AVChannelLayout + if err := ffmpeg.AVUtil_channel_layout_from_string(&ch, layout); err != nil { + return ErrBadParameter.Withf("invalid channel layout %q", layout) + } + o.par.SetCodecType(ffmpeg.AVMEDIA_TYPE_AUDIO) + return o.par.SetChannelLayout(ch) } } // Nuumber of channels -func OptChannels(ch int) Opt { +func OptChannels(num int) Opt { return func(o *opts) error { - if ch <= 0 || ch > 64 { - return ErrBadParameter.Withf("invalid number of channels %v", ch) + var ch ffmpeg.AVChannelLayout + if num <= 0 || num > 64 { + return ErrBadParameter.Withf("invalid number of channels %v", num) } - ffmpeg.AVUtil_channel_layout_default(&o.ch, ch) - return nil + ffmpeg.AVUtil_channel_layout_default(&ch, num) + o.par.SetCodecType(ffmpeg.AVMEDIA_TYPE_AUDIO) + return o.par.SetChannelLayout(ch) } } @@ -115,7 +174,8 @@ func OptSampleRate(rate int) Opt { if rate <= 0 { return ErrBadParameter.Withf("invalid sample rate %v", rate) } - o.samplerate = rate + o.par.SetCodecType(ffmpeg.AVMEDIA_TYPE_AUDIO) + o.par.SetSamplerate(rate) return nil } } @@ -127,7 +187,8 @@ func OptSampleFormat(format string) Opt { if fmt == ffmpeg.AV_SAMPLE_FMT_NONE { return ErrBadParameter.Withf("invalid sample format %q", format) } - o.sample_fmt = fmt + o.par.SetCodecType(ffmpeg.AVMEDIA_TYPE_AUDIO) + o.par.SetSampleFormat(fmt) return nil } } diff --git a/pkg/ffmpeg/packet_encoder.go b/pkg/ffmpeg/packet_encoder.go deleted file mode 100644 index 0ce5ad5..0000000 --- a/pkg/ffmpeg/packet_encoder.go +++ /dev/null @@ -1,105 +0,0 @@ -package ffmpeg - -import ( - "errors" - "io" - "syscall" - - // Packages - ff "github.com/mutablelogic/go-media/sys/ffmpeg61" -) - -////////////////////////////////////////////////////////////////////////////// -// TYPES - -type encoder struct { - codec *ff.AVCodecContext - packet *ff.AVPacket - stream int -} - -// EncodeFn is a function which is called for each packet encoded. It should -// return nil to continue encoding or io.EOF to stop decoding. -type EncodeFn func(*ff.AVPacket) error - -////////////////////////////////////////////////////////////////////////////// -// LIFECYCLE - -func NewPacketEncoder(codec *ff.AVCodecContext, stream int) (*encoder, error) { - encoder := new(encoder) - encoder.codec = codec - - // Create a packet - packet := ff.AVCodec_packet_alloc() - if packet == nil { - return nil, errors.New("failed to allocate packet") - } else { - encoder.packet = packet - encoder.stream = stream - } - - // Return success - return encoder, nil -} - -func (e *encoder) Close() error { - if e.packet != nil { - ff.AVCodec_packet_free(e.packet) - e.packet = nil - } - return nil -} - -////////////////////////////////////////////////////////////////////////////// -// LIFECYCLE - -func (e *encoder) Encode(frame *ff.AVFrame, fn EncodeFn) error { - // Encode a frame - if err := e.encode(frame, fn); err != nil { - return err - } - // Flush - return e.encode(nil, fn) -} - -////////////////////////////////////////////////////////////////////////////// -// PRIVATE METHODS - -func (e *encoder) encode(frame *ff.AVFrame, fn EncodeFn) error { - // Send the frame to the encoder - if err := ff.AVCodec_send_frame(e.codec, frame); err != nil { - return err - } - - // Write out the packets - var result error - for { - // Receive the packet - if err := ff.AVCodec_receive_packet(e.codec, e.packet); errors.Is(err, syscall.EAGAIN) || errors.Is(err, io.EOF) { - // Finished receiving packet or EOF - break - } else if err != nil { - return err - } - - // Pass back to the caller - if err := fn(e.packet); errors.Is(err, io.EOF) { - // End early, return EOF - result = io.EOF - break - } else if err != nil { - return err - } - - // Re-allocate frames for next iteration - ff.AVCodec_packet_unref(e.packet) - } - - // Flush - if result == nil { - result = fn(nil) - } - - // Return success or EOF - return result -} diff --git a/pkg/ffmpeg/par.go b/pkg/ffmpeg/par.go new file mode 100644 index 0000000..3149feb --- /dev/null +++ b/pkg/ffmpeg/par.go @@ -0,0 +1,209 @@ +package ffmpeg + +import ( + "encoding/json" + "fmt" + "slices" + + ff "github.com/mutablelogic/go-media/sys/ffmpeg61" + + // Namespace imports + . "github.com/djthorpe/go-errors" +) + +/////////////////////////////////////////////////////////////////////////////// +// TYPES + +type Par struct { + ff.AVCodecParameters +} + +/////////////////////////////////////////////////////////////////////////////// +// LIFECYCLE + +func NewAudioPar(samplefmt string, channellayout string, samplerate int) (*Par, error) { + par := new(Par) + par.SetCodecType(ff.AVMEDIA_TYPE_AUDIO) + + // Sample Format + if samplefmt_ := ff.AVUtil_get_sample_fmt(samplefmt); samplefmt_ == ff.AV_SAMPLE_FMT_NONE { + return nil, ErrBadParameter.Withf("unknown sample format %q", samplefmt) + } else { + par.SetSampleFormat(samplefmt_) + } + + // Channel layout + var ch ff.AVChannelLayout + if err := ff.AVUtil_channel_layout_from_string(&ch, channellayout); err != nil { + return nil, ErrBadParameter.Withf("channel layout %q", channellayout) + } else if err := par.SetChannelLayout(ch); err != nil { + return nil, err + } + + // Sample rate + if samplerate <= 0 { + return nil, ErrBadParameter.Withf("negative or zero samplerate %v", samplerate) + } else { + par.SetSamplerate(samplerate) + } + + // Return success + return par, nil +} + +func NewVideoPar(pixfmt string, size string) (*Par, error) { + par := new(Par) + par.SetCodecType(ff.AVMEDIA_TYPE_VIDEO) + + // Pixel Format + if pixfmt_ := ff.AVUtil_get_pix_fmt(pixfmt); pixfmt_ == ff.AV_PIX_FMT_NONE { + return nil, ErrBadParameter.Withf("unknown pixel format %q", pixfmt) + } else { + par.SetPixelFormat(pixfmt_) + } + + // Frame size + if w, h, err := ff.AVUtil_parse_video_size(size); err != nil { + return nil, ErrBadParameter.Withf("size %q", size) + } else { + par.SetWidth(w) + par.SetHeight(h) + } + + // Set default sample aspect ratio + par.SetSampleAspectRatio(ff.AVUtil_rational(1, 1)) + + // Return success + return par, nil +} + +func AudioPar(samplefmt string, channellayout string, samplerate int) *Par { + if par, err := NewAudioPar(samplefmt, channellayout, samplerate); err != nil { + panic(err) + } else { + return par + } +} + +func VideoPar(pixfmt string, size string) *Par { + if par, err := NewVideoPar(pixfmt, size); err != nil { + panic(err) + } else { + return par + } +} + +/////////////////////////////////////////////////////////////////////////////// +// STRINGIFY + +func (ctx *Par) MarshalJSON() ([]byte, error) { + return json.Marshal(ctx.AVCodecParameters) +} + +func (ctx *Par) String() string { + data, _ := json.MarshalIndent(ctx, "", " ") + return string(data) +} + +/////////////////////////////////////////////////////////////////////////////// +// PUBLIC METHODS + +func (ctx *Par) ValidateFromCodec(codec *ff.AVCodecContext) error { + switch codec.Codec().Type() { + case ff.AVMEDIA_TYPE_AUDIO: + return ctx.validateAudioCodec(codec) + case ff.AVMEDIA_TYPE_VIDEO: + return ctx.validateVideoCodec(codec) + } + return nil +} + +func (ctx *Par) CopyToCodec(codec *ff.AVCodecContext) error { + switch codec.Codec().Type() { + case ff.AVMEDIA_TYPE_AUDIO: + return ctx.copyAudioCodec(codec) + case ff.AVMEDIA_TYPE_VIDEO: + return ctx.copyVideoCodec(codec) + } + return nil +} + +/////////////////////////////////////////////////////////////////////////////// +// PRIVATE METHODS + +func (ctx *Par) copyAudioCodec(codec *ff.AVCodecContext) error { + codec.SetSampleFormat(ctx.SampleFormat()) + codec.SetSampleRate(ctx.Samplerate()) + if err := codec.SetChannelLayout(ctx.ChannelLayout()); err != nil { + return err + } + return nil +} + +func (ctx *Par) validateAudioCodec(codec *ff.AVCodecContext) error { + sampleformats := codec.Codec().SampleFormats() + samplerates := codec.Codec().SupportedSamplerates() + channellayouts := codec.Codec().ChannelLayouts() + + // First we set params from the codec which are not already set + if ctx.SampleFormat() == ff.AV_SAMPLE_FMT_NONE { + if len(sampleformats) > 0 { + ctx.SetSampleFormat(sampleformats[0]) + } + } + if ctx.Samplerate() == 0 { + if len(samplerates) > 0 { + ctx.SetSamplerate(samplerates[0]) + } + } + if ctx.ChannelLayout().NumChannels() == 0 { + if len(channellayouts) > 0 { + ctx.SetChannelLayout(channellayouts[0]) + } + } + + // Then we check to make sure the parameters are compatible with + // the codec + if len(sampleformats) > 0 { + if !slices.Contains(sampleformats, ctx.SampleFormat()) { + return ErrBadParameter.Withf("unsupported sample format %v", ctx.SampleFormat()) + } + } else if ctx.SampleFormat() == ff.AV_SAMPLE_FMT_NONE { + return ErrBadParameter.With("sample format not set") + } + if len(samplerates) > 0 { + if !slices.Contains(samplerates, ctx.Samplerate()) { + return ErrBadParameter.Withf("unsupported samplerate %v", ctx.Samplerate()) + } + } else if ctx.Samplerate() == 0 { + return ErrBadParameter.With("samplerate not set") + } + if len(channellayouts) > 0 { + valid := false + for _, ch := range channellayouts { + chctx := ctx.ChannelLayout() + if ff.AVUtil_channel_layout_compare(&ch, &chctx) { + valid = true + break + } + } + if !valid { + return ErrBadParameter.Withf("unsupported channel layout %v", ctx.ChannelLayout()) + } + } else if ctx.ChannelLayout().NumChannels() == 0 { + return ErrBadParameter.With("channel layout not set") + } + + // Validated + return nil +} + +func (ctx *Par) copyVideoCodec(codec *ff.AVCodecContext) error { + fmt.Println("TODO: copyVideoCodec") + return nil +} + +func (ctx *Par) validateVideoCodec(codec *ff.AVCodecContext) error { + fmt.Println("TODO: validateVideoCodec") + return nil +} diff --git a/pkg/ffmpeg/par_test.go b/pkg/ffmpeg/par_test.go new file mode 100644 index 0000000..4f8d88a --- /dev/null +++ b/pkg/ffmpeg/par_test.go @@ -0,0 +1,28 @@ +package ffmpeg_test + +import ( + "testing" + + ffmpeg "github.com/mutablelogic/go-media/pkg/ffmpeg" + assert "github.com/stretchr/testify/assert" +) + +func Test_par_001(t *testing.T) { + assert := assert.New(t) + + par, err := ffmpeg.NewAudioPar("fltp", "mono", 22050) + if !assert.NoError(err) { + t.FailNow() + } + t.Log(par) +} + +func Test_par_002(t *testing.T) { + assert := assert.New(t) + + par, err := ffmpeg.NewVideoPar("yuv420p", "1280x720") + if !assert.NoError(err) { + t.FailNow() + } + t.Log(par) +} diff --git a/pkg/ffmpeg/resampler.go b/pkg/ffmpeg/resampler.go index 84b0aeb..e56d2f8 100644 --- a/pkg/ffmpeg/resampler.go +++ b/pkg/ffmpeg/resampler.go @@ -12,9 +12,9 @@ import ( // TYPES type resampler struct { - opts - ctx *ff.SWRContext - dest *ff.AVFrame + ctx *ff.SWRContext + dest *ff.AVFrame + force bool } //////////////////////////////////////////////////////////////////////////////// @@ -23,23 +23,26 @@ type resampler struct { // Create a new audio resampler which will resample the input frame to the // specified channel layout, sample rate and sample format. func NewResampler(format ff.AVSampleFormat, opt ...Opt) (*resampler, error) { + options := newOpts() resampler := new(resampler) // Apply options - resampler.sample_fmt = format - resampler.ch = ff.AV_CHANNEL_LAYOUT_MONO - resampler.samplerate = 44100 + options.par.SetCodecType(ff.AVMEDIA_TYPE_AUDIO) + options.par.SetSampleFormat(format) + options.par.SetChannelLayout(ff.AV_CHANNEL_LAYOUT_MONO) + options.par.SetSamplerate(44100) for _, o := range opt { - if err := o(&resampler.opts); err != nil { + if err := o(options); err != nil { return nil, err } } // Check parameters - if resampler.sample_fmt == ff.AV_SAMPLE_FMT_NONE { + if options.par.SampleFormat() == ff.AV_SAMPLE_FMT_NONE { return nil, errors.New("invalid sample format parameters") } - if !ff.AVUtil_channel_layout_check(&resampler.ch) { + ch := options.par.ChannelLayout() + if !ff.AVUtil_channel_layout_check(&ch) { return nil, errors.New("invalid channel layout parameters") } @@ -52,15 +55,18 @@ func NewResampler(format ff.AVSampleFormat, opt ...Opt) (*resampler, error) { // Set parameters - we don't allocate the buffer here, // we do that when we have a source frame and know how // large the destination frame should be - dest.SetSampleFormat(resampler.sample_fmt) - dest.SetSampleRate(resampler.samplerate) - if err := dest.SetChannelLayout(resampler.ch); err != nil { + dest.SetSampleFormat(options.par.SampleFormat()) + dest.SetSampleRate(options.par.Samplerate()) + if err := dest.SetChannelLayout(options.par.ChannelLayout()); err != nil { ff.AVUtil_frame_free(dest) return nil, err } else { resampler.dest = dest } + // Set force flag + resampler.force = options.force + // Return success return resampler, nil } diff --git a/pkg/ffmpeg/resampler_test.go b/pkg/ffmpeg/resampler_test.go index e84f54d..dc3adc5 100644 --- a/pkg/ffmpeg/resampler_test.go +++ b/pkg/ffmpeg/resampler_test.go @@ -13,7 +13,7 @@ func Test_resampler_001(t *testing.T) { assert := assert.New(t) // Sine wave generator - audio, err := generator.NewSine(2000, 10, 44100) + audio, err := generator.NewSine(2000, 10, ffmpeg.AudioPar("fltp", "mono", 44100)) if !assert.NoError(err) { t.FailNow() } diff --git a/pkg/ffmpeg/rescaler.go b/pkg/ffmpeg/rescaler.go index 87deafa..f6cfb6e 100644 --- a/pkg/ffmpeg/rescaler.go +++ b/pkg/ffmpeg/rescaler.go @@ -12,13 +12,12 @@ import ( // TYPES type rescaler struct { - opts - src_pix_fmt ff.AVPixelFormat src_width int src_height int ctx *ff.SWSContext flags ff.SWSFlag + force bool dest *ff.AVFrame } @@ -28,20 +27,22 @@ type rescaler struct { // Create a new rescaler which will rescale the input frame to the // specified format, width and height. func NewRescaler(format ff.AVPixelFormat, opt ...Opt) (*rescaler, error) { + options := newOpts() rescaler := new(rescaler) // Apply options - rescaler.pix_fmt = format - rescaler.width = 640 - rescaler.height = 480 + options.par.SetCodecType(ff.AVMEDIA_TYPE_VIDEO) + options.par.SetPixelFormat(format) + options.par.SetWidth(640) + options.par.SetHeight(480) for _, o := range opt { - if err := o(&rescaler.opts); err != nil { + if err := o(options); err != nil { return nil, err } } // Check parameters - if rescaler.pix_fmt == ff.AV_PIX_FMT_NONE { + if options.par.PixelFormat() == ff.AV_PIX_FMT_NONE { return nil, errors.New("invalid parameters") } @@ -51,10 +52,13 @@ func NewRescaler(format ff.AVPixelFormat, opt ...Opt) (*rescaler, error) { return nil, errors.New("failed to allocate frame") } + // Set force flag + rescaler.force = options.force + // Set parameters - dest.SetPixFmt(rescaler.pix_fmt) - dest.SetWidth(rescaler.width) - dest.SetHeight(rescaler.height) + dest.SetPixFmt(options.par.PixelFormat()) + dest.SetWidth(options.par.Width()) + dest.SetHeight(options.par.Height()) // Allocate buffer if err := ff.AVUtil_frame_get_buffer(dest, false); err != nil { diff --git a/pkg/ffmpeg/writer.go b/pkg/ffmpeg/writer.go index 82f7377..2987c28 100644 --- a/pkg/ffmpeg/writer.go +++ b/pkg/ffmpeg/writer.go @@ -4,9 +4,12 @@ import ( "errors" "fmt" "io" + "os" + "sort" // Packages ff "github.com/mutablelogic/go-media/sys/ffmpeg61" + maps "golang.org/x/exp/maps" // Namespace imports . "github.com/djthorpe/go-errors" @@ -17,7 +20,9 @@ import ( // Create media from io.Writer type Writer struct { - output *ff.AVFormatContext + output *ff.AVFormatContext + header bool + encoders []*Encoder } type writer_callback struct { @@ -35,33 +40,58 @@ const ( // LIFECYCLE func NewWriter(w io.Writer, opt ...Opt) (*Writer, error) { - var o opts - + options := newOpts() writer := new(Writer) // Apply options for _, opt := range opt { - if err := opt(&o); err != nil { + if err := opt(options); err != nil { return nil, err } } // Check output - if o.oformat == nil { + var filename string + if options.oformat == nil { return nil, ErrBadParameter.Withf("invalid output format") + } else if w_, ok := w.(*os.File); ok { + filename = w_.Name() } // Allocate the AVIO context - avio := ff.AVFormat_avio_alloc_context(bufSize, false, &writer_callback{w}) + avio := ff.AVFormat_avio_alloc_context(bufSize, true, &writer_callback{w}) if avio == nil { return nil, errors.New("failed to allocate avio context") - } else if ctx, err := ff.AVFormat_open_writer(avio, o.oformat, ""); err != nil { + } else if ctx, err := ff.AVFormat_open_writer(avio, options.oformat, filename); err != nil { return nil, err } else { writer.output = ctx } - fmt.Println("WRITER", writer.output) + // Create codec contexts for each stream + var result error + keys := sort.IntSlice(maps.Keys(options.streams)) + for _, stream := range keys { + encoder, err := NewEncoder(writer.output, stream, options.streams[stream]) + if err != nil { + result = errors.Join(result, err) + continue + } else { + writer.encoders = append(writer.encoders, encoder) + } + } + + // Return any errors + if result != nil { + return nil, errors.Join(result, writer.Close()) + } + + // Write the header + if err := ff.AVFormat_write_header(writer.output, nil); err != nil { + return nil, errors.Join(err, writer.Close()) + } else { + writer.header = true + } // Return success return writer, nil @@ -70,23 +100,113 @@ func NewWriter(w io.Writer, opt ...Opt) (*Writer, error) { func (w *Writer) Close() error { var result error + // Write the trailer if the header was written + if w.header { + if err := ff.AVFormat_write_trailer(w.output); err != nil { + result = errors.Join(result, err) + } + } + + // Close encoders + for _, encoder := range w.encoders { + result = errors.Join(result, encoder.Close()) + } + // Free output resources if w.output != nil { - // This calls avio_close(w.avio) - fmt.Println("TODO: AVFormat_close_writer") result = errors.Join(result, ff.AVFormat_close_writer(w.output)) - w.output = nil } + // Free resources + w.output = nil + w.encoders = nil + // Return any errors return result } +////////////////////////////////////////////////////////////////////////////// +// PUBLIC METHODS + +// Return a "stream" for encoding +func (w *Writer) Stream(stream int) *Encoder { + for _, encoder := range w.encoders { + if encoder.stream.Id() == stream { + return encoder + } + } + return nil +} + +// Encode frames from all encoders, calling the callback function to encode +// the frame. If the callback function returns io.EOF then the encoding for +// that encoder is stopped after flushing. If the second callback is nil, +// then packets are written to the output. +func (w *Writer) Encode(in EncoderFrameFn, out EncoderPacketFn) error { + if in == nil { + return ErrBadParameter.With("nil in or out") + } + if out == nil { + // By default, write packet to output + out = w.Write + } + + // Initialise encoders + encoders := make(map[int]*Encoder, len(w.encoders)) + for _, encoder := range w.encoders { + stream := encoder.stream.Id() + if _, exists := encoders[stream]; exists { + return ErrBadParameter.Withf("duplicate stream %v", stream) + } + encoders[stream] = encoder + } + + // Continue until all encoders have returned io.EOF + for { + // No more encoding to do + if len(encoders) == 0 { + break + } + for stream, encoder := range encoders { + // Receive a frame for the encoder + frame, err := in(stream) + if errors.Is(err, io.EOF) { + fmt.Println("EOF for frame on stream", stream) + delete(encoders, stream) + } else if err != nil { + return fmt.Errorf("stream %v: %w", stream, err) + } else if frame == nil { + return fmt.Errorf("stream %v: nil frame received", stream) + } else if err := encoder.Encode(frame, out); errors.Is(err, io.EOF) { + fmt.Println("EOF for packet on stream", stream) + delete(encoders, stream) + } else if err != nil { + return fmt.Errorf("stream %v: %w", stream, err) + } + } + } + + // Return success + return nil +} + +// Write a packet to the output +func (w *Writer) Write(packet *ff.AVPacket) error { + return ff.AVCodec_interleaved_write_frame(w.output, packet) +} + //////////////////////////////////////////////////////////////////////////////// -// PRIVATE METHODS +// PRIVATE METHODS - Writer func (w *writer_callback) Reader(buf []byte) int { - return 0 + if r, ok := w.w.(io.Reader); ok { + if n, err := r.Read(buf); err != nil { + return -1 + } else { + return n + } + } + return -1 } func (w *writer_callback) Seeker(offset int64, whence int) int64 { diff --git a/pkg/ffmpeg/writer_test.go b/pkg/ffmpeg/writer_test.go index 2836309..e421574 100644 --- a/pkg/ffmpeg/writer_test.go +++ b/pkg/ffmpeg/writer_test.go @@ -1,11 +1,14 @@ package ffmpeg_test import ( - "fmt" + "io" "os" "testing" + "time" ffmpeg "github.com/mutablelogic/go-media/pkg/ffmpeg" + generator "github.com/mutablelogic/go-media/pkg/generator" + ff "github.com/mutablelogic/go-media/sys/ffmpeg61" assert "github.com/stretchr/testify/assert" ) @@ -22,11 +25,36 @@ func Test_writer_001(t *testing.T) { // Create a writer with an audio stream writer, err := ffmpeg.NewWriter(w, ffmpeg.OptOutputFormat(w.Name()), + ffmpeg.OptAudioStream(1, ffmpeg.AudioPar("fltp", "mono", 22050)), ) if !assert.NoError(err) { t.FailNow() } defer writer.Close() - fmt.Println("Written to", w.Name()) + // Make an audio generator + audio, err := generator.NewSine(440, -5, writer.Stream(1).Par()) + if !assert.NoError(err) { + t.FailNow() + } + defer audio.Close() + + // Write frames + n := 0 + assert.NoError(writer.Encode(func(stream int) (*ff.AVFrame, error) { + frame := audio.Frame() + t.Log("Frame s", frame.Time().Truncate(time.Millisecond)) + if frame.Time() > 10*time.Second { + return nil, io.EOF + } else { + return frame.(*ffmpeg.Frame).AVFrame(), nil + } + }, func(packet *ff.AVPacket) error { + if packet != nil { + t.Log("Packet ts", packet.Pts()) + n += packet.Size() + } + return writer.Write(packet) + })) + t.Log("Written", n, "bytes to", w.Name()) } diff --git a/pkg/generator/sine.go b/pkg/generator/sine.go index 32b1e0e..2763ff1 100644 --- a/pkg/generator/sine.go +++ b/pkg/generator/sine.go @@ -27,7 +27,7 @@ var _ Generator = (*sine)(nil) // GLOBALS const ( - frameDuration = 20 * time.Millisecond // Each frame is 20ms of audio + frameDuration = 10 * time.Millisecond // Each frame is 10ms of audio ) //////////////////////////////////////////////////////////////////////////// @@ -36,19 +36,29 @@ const ( // Create a new sine wave generator with one channel using float32 // for samples. The frequency in Hz, volume in decibels and samplerate // (ie, 44100) for the audio stream are passed as arguments. -func NewSine(freq float64, volume float64, samplerate int) (*sine, error) { +func NewSine(freq, volume float64, par *ffmpeg.Par) (*sine, error) { sine := new(sine) // Check parameters + if par.CodecType() != ff.AVMEDIA_TYPE_AUDIO { + return nil, errors.New("invalid codec type") + } else if par.ChannelLayout().NumChannels() != 1 { + return nil, errors.New("invalid channel layout, only mono is supported") + } else if par.SampleFormat() != ff.AV_SAMPLE_FMT_FLT && par.SampleFormat() != ff.AV_SAMPLE_FMT_FLTP { + return nil, errors.New("invalid sample format, only float32 is supported") + } if freq <= 0 { return nil, errors.New("invalid frequency") } if volume <= -100 { return nil, errors.New("invalid volume") } - if samplerate <= 0 { + if par.Samplerate() <= 0 { return nil, errors.New("invalid samplerate") } + if par.FrameSize() <= 0 { + par.SetFrameSize(int(float64(par.Samplerate()) * frameDuration.Seconds())) + } // Create a frame frame := ff.AVUtil_frame_alloc() @@ -56,16 +66,13 @@ func NewSine(freq float64, volume float64, samplerate int) (*sine, error) { return nil, errors.New("failed to allocate frame") } - // Set frame parameters - numSamples := int(float64(samplerate) * frameDuration.Seconds()) - frame.SetSampleFormat(ff.AV_SAMPLE_FMT_FLT) // float32 if err := frame.SetChannelLayout(ff.AV_CHANNEL_LAYOUT_MONO); err != nil { return nil, err } - frame.SetSampleRate(samplerate) - frame.SetNumSamples(numSamples) - frame.SetTimeBase(ff.AVUtil_rational(1, samplerate)) + frame.SetSampleRate(par.Samplerate()) + frame.SetNumSamples(par.FrameSize()) + frame.SetTimeBase(ff.AVUtil_rational(1, par.Samplerate())) frame.SetPts(ff.AV_NOPTS_VALUE) // Allocate buffer @@ -101,6 +108,10 @@ func (s *sine) String() string { // Return the first and subsequent frames of raw audio data func (s *sine) Frame() media.Frame { + if err := ff.AVUtil_frame_make_writable(s.frame); err != nil { + return nil + } + // Set the Pts if s.frame.Pts() == ff.AV_NOPTS_VALUE { s.frame.SetPts(0) diff --git a/pkg/generator/sine_test.go b/pkg/generator/sine_test.go index 95d15ad..7781aee 100644 --- a/pkg/generator/sine_test.go +++ b/pkg/generator/sine_test.go @@ -6,13 +6,14 @@ import ( "testing" "time" + "github.com/mutablelogic/go-media/pkg/ffmpeg" "github.com/mutablelogic/go-media/pkg/generator" "github.com/stretchr/testify/assert" ) func Test_sine_001(t *testing.T) { assert := assert.New(t) - sine, err := generator.NewSine(2000, 10, 44100) + sine, err := generator.NewSine(2000, 10, ffmpeg.AudioPar("fltp", "mono", 44100)) if !assert.NoError(err) { t.SkipNow() } @@ -23,7 +24,7 @@ func Test_sine_001(t *testing.T) { func Test_sine_002(t *testing.T) { assert := assert.New(t) - sine, err := generator.NewSine(2000, 10, 44100) + sine, err := generator.NewSine(2000, 10, ffmpeg.AudioPar("fltp", "mono", 44100)) if !assert.NoError(err) { t.SkipNow() } @@ -42,7 +43,7 @@ func Test_sine_003(t *testing.T) { const frequency = 440 const volume = -10.0 - sine, err := generator.NewSine(frequency, volume, sampleRate) + sine, err := generator.NewSine(frequency, volume, ffmpeg.AudioPar("fltp", "mono", sampleRate)) if !assert.NoError(err) { t.SkipNow() } diff --git a/pkg/generator/yuv420p.go b/pkg/generator/yuv420p.go index cb3133c..3ad5556 100644 --- a/pkg/generator/yuv420p.go +++ b/pkg/generator/yuv420p.go @@ -80,6 +80,10 @@ func (yuv420p *yuv420p) String() string { // Return the first and subsequent frames of raw video data func (yuv420p *yuv420p) Frame() media.Frame { + if err := ff.AVUtil_frame_make_writable(yuv420p.frame); err != nil { + return nil + } + // Set the Pts if yuv420p.frame.Pts() == ff.AV_NOPTS_VALUE { yuv420p.frame.SetPts(0) diff --git a/sys/ffmpeg61/avcodec.go b/sys/ffmpeg61/avcodec.go index e7bd26e..d4e6def 100644 --- a/sys/ffmpeg61/avcodec.go +++ b/sys/ffmpeg61/avcodec.go @@ -59,6 +59,7 @@ type jsonAVCodecContext struct { SampleFormat AVSampleFormat `json:"sample_fmt,omitempty"` SampleRate int `json:"sample_rate,omitempty"` ChannelLayout AVChannelLayout `json:"channel_layout,omitempty"` + FrameSize int `json:"frame_size,omitempty"` TimeBase AVRational `json:"time_base,omitempty"` } @@ -192,6 +193,7 @@ func (ctx *AVCodecContext) MarshalJSON() ([]byte, error) { SampleFormat: AVSampleFormat(ctx.sample_fmt), SampleRate: int(ctx.sample_rate), ChannelLayout: AVChannelLayout(ctx.ch_layout), + FrameSize: int(ctx.frame_size), }) default: return json.Marshal(jsonAVCodecContext{ diff --git a/sys/ffmpeg61/avcodec_parameters.go b/sys/ffmpeg61/avcodec_parameters.go index 8166bea..833c303 100644 --- a/sys/ffmpeg61/avcodec_parameters.go +++ b/sys/ffmpeg61/avcodec_parameters.go @@ -2,6 +2,7 @@ package ffmpeg import ( "encoding/json" + "errors" ) //////////////////////////////////////////////////////////////////////////////// @@ -18,14 +19,14 @@ import "C" // TYPES type jsonAVCodecParametersAudio struct { - SampleFormat AVSampleFormat `json:"format"` + SampleFormat AVSampleFormat `json:"sample_format"` SampleRate int `json:"sample_rate"` ChannelLayout AVChannelLayout `json:"channel_layout"` FrameSize int `json:"frame_size,omitempty"` } type jsonAVCodecParameterVideo struct { - PixelFormat AVPixelFormat `json:"format"` + PixelFormat AVPixelFormat `json:"pixel_format"` Width int `json:"width"` Height int `json:"height"` SampleAspectRatio AVRational `json:"sample_aspect_ratio,omitempty"` @@ -43,7 +44,7 @@ type jsonAVCodecParameters struct { //////////////////////////////////////////////////////////////////////////////// // STRINGIFY -func (ctx *AVCodecParameters) MarshalJSON() ([]byte, error) { +func (ctx AVCodecParameters) MarshalJSON() ([]byte, error) { par := jsonAVCodecParameters{ CodecType: AVMediaType(ctx.codec_type), CodecID: AVCodecID(ctx.codec_id), @@ -82,6 +83,10 @@ func (ctx *AVCodecParameters) CodecType() AVMediaType { return AVMediaType(ctx.codec_type) } +func (ctx *AVCodecParameters) SetCodecType(t AVMediaType) { + ctx.codec_type = C.enum_AVMediaType(t) +} + func (ctx *AVCodecParameters) CodecID() AVCodecID { return AVCodecID(ctx.codec_id) } @@ -104,6 +109,10 @@ func (ctx *AVCodecParameters) BitRate() int64 { return int64(ctx.bit_rate) } +func (ctx *AVCodecParameters) SetBitRate(rate int64) { + ctx.bit_rate = C.int64_t(rate) +} + // Audio func (ctx *AVCodecParameters) SampleFormat() AVSampleFormat { if AVMediaType(ctx.codec_type) == AVMEDIA_TYPE_AUDIO { @@ -113,21 +122,41 @@ func (ctx *AVCodecParameters) SampleFormat() AVSampleFormat { } } +func (ctx *AVCodecParameters) SetSampleFormat(format AVSampleFormat) { + ctx.format = C.int(format) +} + // Audio func (ctx *AVCodecParameters) Samplerate() int { return int(ctx.sample_rate) } +func (ctx *AVCodecParameters) SetSamplerate(rate int) { + ctx.sample_rate = C.int(rate) +} + // Audio func (ctx *AVCodecParameters) ChannelLayout() AVChannelLayout { return AVChannelLayout(ctx.ch_layout) } +func (ctx *AVCodecParameters) SetChannelLayout(layout AVChannelLayout) error { + if !AVUtil_channel_layout_check(&layout) { + return errors.New("invalid channel layout") + } + ctx.ch_layout = C.AVChannelLayout(layout) + return nil +} + // Audio func (ctx *AVCodecParameters) FrameSize() int { return int(ctx.frame_size) } +func (ctx *AVCodecParameters) SetFrameSize(size int) { + ctx.frame_size = C.int(size) +} + // Video func (ctx *AVCodecParameters) PixelFormat() AVPixelFormat { if AVMediaType(ctx.codec_type) == AVMEDIA_TYPE_VIDEO { @@ -137,17 +166,33 @@ func (ctx *AVCodecParameters) PixelFormat() AVPixelFormat { } } +func (ctx *AVCodecParameters) SetPixelFormat(format AVPixelFormat) { + ctx.format = C.int(format) +} + // Video func (ctx *AVCodecParameters) SampleAspectRatio() AVRational { return AVRational(ctx.sample_aspect_ratio) } +func (ctx *AVCodecParameters) SetSampleAspectRatio(aspect AVRational) { + ctx.sample_aspect_ratio = C.AVRational(aspect) +} + // Video func (ctx *AVCodecParameters) Width() int { return int(ctx.width) } +func (ctx *AVCodecParameters) SetWidth(width int) { + ctx.width = C.int(width) +} + // Video func (ctx *AVCodecParameters) Height() int { return int(ctx.height) } + +func (ctx *AVCodecParameters) SetHeight(height int) { + ctx.height = C.int(height) +}