From da40bd3594707e3b983e0400cf4d39befafd766a Mon Sep 17 00:00:00 2001 From: Alex Eidt Date: Fri, 9 Sep 2022 22:20:56 -0700 Subject: [PATCH] Added support for multiple Audio streams --- README.md | 38 +++++++++++--------- aio_test.go | 4 +-- audio.go | 97 ++++++++++++++++++++++++++++++++++++-------------- audiowriter.go | 2 +- microphone.go | 26 +++++++------- options.go | 1 + player.go | 39 +------------------- utils.go | 62 +++++++++++++++++++------------- 8 files changed, 148 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index c8afa14..54b6a5b 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ The `Options` struct is used to specify optional parameters for Audio I/O. ```go type Options struct { + Stream int // Audio Stream Index to use. SampleRate int // Sample rate in Hz. Channels int // Number of channels. Bitrate int // Bitrate. @@ -49,12 +50,15 @@ which corresponds to 1 second of audio data. The user may pass in `options` to set the desired sampling rate, format and channels of the audio. If `options` is `nil`, then the channels and sampling rate from the file will be used, with a default format of `s16`. +The `Read()` function fills the internal byte buffer with the next batch of audio samples. Once the entire file has been read, `Read()` will return `false` and close the `Audio` struct. + Note that the `Samples()` function is only present for convenience. It casts the raw byte buffer into the given audio data type determined by the `Format()` such that the underlying data buffers are the same. The `s24` and `u24` formats are not supported by the `Samples()` function since there is no type equivalent. Calling the `Samples()` function on 24-bit audio will return the raw byte buffer. The return value of the `Samples()` function will have to be cast into an array of the desired type (e.g. `audio.Samples().([]float32)`) ```go aio.NewAudio(filename string, options *aio.Options) (*aio.Audio, error) +aio.NewAudioStreams(filename string, options *Options) ([]*aio.Audio, error) FileName() string SampleRate() int @@ -64,8 +68,10 @@ Duration() float64 Format() string Codec() string BitsPerSample() int +Stream() int Total() int Buffer() []byte +MetaData() map[string]string Samples() interface{} SetBuffer(buffer []byte) error @@ -134,12 +140,6 @@ Play(samples interface{}) error Close() ``` -Additionally, files may be played directly using the `Play` function: - -```go -aio.Play(filename string) error -``` - ## Examples Copy `input.wav` to `output.mp3`. @@ -185,15 +185,25 @@ for mic.Read() && seconds < 10 { } ``` +Play all audio tracks from `input.mp4` sequentially. + +```go +streams, _ := aio.NewAudioStreams("input.mp4", nil) + +for _, stream := range streams { + player, _ := aio.NewPlayer(stream.Channels(), stream.SampleRate(), stream.Format()) + for stream.Read() { + player.Play(stream.Buffer()) + } + player.Close() +} +``` + Play `input.mp4`. ```go audio, _ := aio.NewAudio("input.mp4", nil) -player, _ := aio.NewPlayer( - audio.Channels(), - audio.SampleRate(), - audio.Format(), -) +player, _ := aio.NewPlayer(audio.Channels(), audio.SampleRate(), audio.Format()) defer player.Close() for audio.Read() { @@ -242,11 +252,7 @@ Play Microphone audio. Use default microphone settings for recording. mic, _ := aio.NewMicrophone(0, nil) defer mic.Close() -player, _ := aio.NewPlayer( - mic.Channels(), - mic.SampleRate(), - mic.Format(), -) +player, _ := aio.NewPlayer(mic.Channels(), mic.SampleRate(), mic.Format()) defer player.Close() for mic.Read() { diff --git a/aio_test.go b/aio_test.go index f752ec5..7be70fe 100644 --- a/aio_test.go +++ b/aio_test.go @@ -165,6 +165,7 @@ func TestAudioIO(t *testing.T) { assertEquals(audio.Format(), "s16") assertEquals(audio.Codec(), "mp3") assertEquals(audio.BitsPerSample(), 16) + assertEquals(audio.Stream(), 0) assertEquals(len(audio.Buffer()), 0) fmt.Println("Audio File IO test passed") @@ -278,7 +279,7 @@ func TestAudioResampling(t *testing.T) { func TestDeviceParsingWindows(t *testing.T) { // Sample string taken from FFmpeg wiki: data := parseDevices( - []byte(`ffmpeg version N-45279-g6b86dd5... --enable-runtime-cpudetect + `ffmpeg version N-45279-g6b86dd5... --enable-runtime-cpudetect libavutil 51. 74.100 / 51. 74.100 libavcodec 54. 65.100 / 54. 65.100 libavformat 54. 31.100 / 54. 31.100 @@ -293,7 +294,6 @@ func TestDeviceParsingWindows(t *testing.T) { [dshow @ 03ACF580] "Internal Microphone (Conexant 2" [dshow @ 03ACF580] "virtual-audio-capturer" dummy: Immediate exit requested`, - ), ) assertEquals(data[0], "Internal Microphone (Conexant 2") diff --git a/audio.go b/audio.go index 98fc6f5..eba4eb2 100644 --- a/audio.go +++ b/audio.go @@ -12,17 +12,19 @@ import ( ) type Audio struct { - filename string // Audio Filename. - samplerate int // Audio Sample Rate in Hz. - channels int // Number of audio channels. - bitrate int // Bitrate for audio encoding. - duration float64 // Duration of audio in seconds. - format string // Format of audio samples. - codec string // Codec used for video encoding. - bps int // Bits per sample. - buffer []byte // Raw audio data. - pipe *io.ReadCloser // Stdout pipe for ffmpeg process. - cmd *exec.Cmd // ffmpeg command. + filename string // Audio Filename. + samplerate int // Audio Sample Rate in Hz. + channels int // Number of audio channels. + bitrate int // Bitrate for audio encoding. + duration float64 // Duration of audio in seconds. + format string // Format of audio samples. + codec string // Codec used for video encoding. + bps int // Bits per sample. + stream int // Stream Index. + buffer []byte // Raw audio data. + metadata map[string]string // Audio Metadata. + pipe *io.ReadCloser // Stdout pipe for ffmpeg process. + cmd *exec.Cmd // ffmpeg command. } func (audio *Audio) FileName() string { @@ -64,6 +66,11 @@ func (audio *Audio) BitsPerSample() int { return audio.bps } +// Returns the zero-indexed audio stream index. +func (audio *Audio) Stream() int { + return audio.stream +} + // Returns the total number of audio samples in the file. func (audio *Audio) Total() int { frame := audio.channels * audio.bps / 8 @@ -76,9 +83,14 @@ func (audio *Audio) Buffer() []byte { return audio.buffer } +// Raw Metadata from ffprobe output for the audio file. +func (audio *Audio) MetaData() map[string]string { + return audio.metadata +} + // Casts the values in the byte buffer to those specified by the audio format. func (audio *Audio) Samples() interface{} { - return convertBytesToSamples(audio.buffer, len(audio.buffer)/(audio.bps/8), audio.format) + return bytesToSamples(audio.buffer, len(audio.buffer)/(audio.bps/8), audio.format) } // Sets the buffer to the given byte array. The length of the buffer must be a multiple @@ -92,6 +104,24 @@ func (audio *Audio) SetBuffer(buffer []byte) error { } func NewAudio(filename string, options *Options) (*Audio, error) { + if options == nil { + options = &Options{} + } + + streams, err := NewAudioStreams(filename, options) + if streams == nil { + return nil, err + } + + if options.Stream < 0 || options.Stream >= len(streams) { + return nil, fmt.Errorf("invalid stream index: %d, must be between 0 and %d", options.Stream, len(streams)) + } + + return streams[options.Stream], err +} + +// Read all audio streams from the given file. +func NewAudioStreams(filename string, options *Options) ([]*Audio, error) { if !exists(filename) { return nil, fmt.Errorf("video file %s does not exist", filename) } @@ -112,36 +142,47 @@ func NewAudio(filename string, options *Options) (*Audio, error) { return nil, fmt.Errorf("no audio data found in %s", filename) } - audio := &Audio{filename: filename} - if options == nil { options = &Options{} } + var format string if options.Format == "" { - audio.format = createFormat("s16") // s16 default format. + format = createFormat("s16") // s16 default format. } else { - audio.format = createFormat(options.Format) + format = createFormat(options.Format) } - if err := checkFormat(audio.format); err != nil { + if err := checkFormat(format); err != nil { return nil, err } - bps := int(parse(regexp.MustCompile(`\d{1,2}`).FindString(audio.format))) // Bits per sample. - audio.bps = bps + bps := int(parse(regexp.MustCompile(`\d{1,2}`).FindString(format))) // Bits per sample. - audio.addAudioData(audioData) + streams := make([]*Audio, len(audioData)) + for i, data := range audioData { + audio := &Audio{ + filename: filename, + format: format, + bps: bps, + stream: i, + metadata: data, + } - if options.SampleRate != 0 { - audio.samplerate = options.SampleRate - } + audio.addAudioData(data) + + if options.SampleRate != 0 { + audio.samplerate = options.SampleRate + } + + if options.Channels != 0 { + audio.channels = options.Channels + } - if options.Channels != 0 { - audio.channels = options.Channels + streams[i] = audio } - return audio, nil + return streams, nil } // Adds audio data to the Audio struct from the ffprobe output. @@ -176,11 +217,13 @@ func (audio *Audio) init() error { "-acodec", fmt.Sprintf("pcm_%s", audio.format), "-ar", fmt.Sprintf("%d", audio.samplerate), "-ac", fmt.Sprintf("%d", audio.channels), + "-map", fmt.Sprintf("0:a:%d", audio.stream), "-loglevel", "quiet", "-", ) audio.cmd = cmd + pipe, err := cmd.StdoutPipe() if err != nil { return err @@ -206,6 +249,7 @@ func (audio *Audio) Read() bool { return false } } + total := 0 for total < len(audio.buffer) { n, err := (*audio.pipe).Read(audio.buffer[total:]) @@ -215,6 +259,7 @@ func (audio *Audio) Read() bool { } total += n } + return true } diff --git a/audiowriter.go b/audiowriter.go index a506410..326b508 100644 --- a/audiowriter.go +++ b/audiowriter.go @@ -167,7 +167,7 @@ func (writer *AudioWriter) init() error { // Writes the given samples to the audio file. func (writer *AudioWriter) Write(samples interface{}) error { - buffer := convertSamplesToBytes(samples) + buffer := samplesToBytes(samples) if buffer == nil { return fmt.Errorf("invalid sample data type") } diff --git a/microphone.go b/microphone.go index f0fdba1..fc36f88 100644 --- a/microphone.go +++ b/microphone.go @@ -1,6 +1,7 @@ package aio import ( + "bytes" "fmt" "io" "os" @@ -54,7 +55,7 @@ func (mic *Microphone) Buffer() []byte { } func (mic *Microphone) Samples() interface{} { - return convertBytesToSamples(mic.buffer, len(mic.buffer)/(mic.bps/8), mic.format) + return bytesToSamples(mic.buffer, len(mic.buffer)/(mic.bps/8), mic.format) } // Sets the buffer to the given byte array. The length of the buffer must be a multiple @@ -128,25 +129,24 @@ func NewMicrophone(stream int, options *Options) (*Microphone, error) { } // Parses the microphone metadata from ffmpeg output. -func (mic *Microphone) parseMicrophoneData(buffer []byte) { - bufferstr := string(buffer) +func (mic *Microphone) parseMicrophoneData(buffer string) { // Sample String: "Stream #0:0: Audio: pcm_s16le, 44100 Hz, stereo, s16, 1411 kb/s". - index := strings.Index(bufferstr, "Stream #") + index := strings.Index(buffer, "Stream #") if index == -1 { index++ } - bufferstr = bufferstr[index:] + buffer = buffer[index:] // Sample rate. regex := regexp.MustCompile(`\d+ Hz`) - match := regex.FindString(bufferstr) + match := regex.FindString(buffer) if len(match) > 0 { mic.samplerate = int(parse(match[:len(match)-len(" Hz")])) } mic.channels = 2 // stereo by default. - if strings.Contains(bufferstr, "stereo") { + if strings.Contains(buffer, "stereo") { mic.channels = 2 - } else if strings.Contains(bufferstr, "mono") { + } else if strings.Contains(buffer, "mono") { mic.channels = 1 } } @@ -175,11 +175,11 @@ func (mic *Microphone) getMicrophoneData(device string) error { return err } // Read ffmpeg output from Stdout. - buffer := make([]byte, 2<<11) - total := 0 + builder := bytes.Buffer{} + buffer := make([]byte, 1024) for { - n, err := pipe.Read(buffer[total:]) - total += n + n, err := pipe.Read(buffer) + builder.Write(buffer[:n]) if err == io.EOF { break } @@ -187,7 +187,7 @@ func (mic *Microphone) getMicrophoneData(device string) error { // Wait for the command to finish. cmd.Wait() - mic.parseMicrophoneData(buffer[:total]) + mic.parseMicrophoneData(builder.String()) return nil } diff --git a/options.go b/options.go index 2b251cb..6cd7dd9 100644 --- a/options.go +++ b/options.go @@ -1,6 +1,7 @@ package aio type Options struct { + Stream int // Audio Stream Index to use. SampleRate int // Sample rate in Hz. Channels int // Number of channels. Bitrate int // Bitrate. diff --git a/player.go b/player.go index 488d714..5aeb15e 100644 --- a/player.go +++ b/player.go @@ -9,43 +9,6 @@ import ( "syscall" ) -// Play the audio from the given file. -func Play(filename string) error { - if !exists(filename) { - return fmt.Errorf("file %s does not exist", filename) - } - // Check if ffplay is installed on the users machine. - if err := installed("ffplay"); err != nil { - return err - } - - cmd := exec.Command( - "ffplay", - "-i", filename, - "-nodisp", - "-autoexit", - "-loglevel", "quiet", - ) - if err := cmd.Start(); err != nil { - return err - } - - // Stop ffplay process when user presses Ctrl+C. - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt, syscall.SIGTERM) - go func() { - <-c - if cmd != nil { - cmd.Process.Kill() - } - os.Exit(1) - }() - - cmd.Wait() - - return nil -} - type Player struct { samplerate int // Audio Sample Rate in Hz. channels int // Number of audio channels. @@ -115,7 +78,7 @@ func NewPlayer(channels, samplerate int, format string) (*Player, error) { } func (player *Player) Play(samples interface{}) error { - buffer := convertSamplesToBytes(samples) + buffer := samplesToBytes(samples) if buffer == nil { return fmt.Errorf("invalid sample data type") } diff --git a/utils.go b/utils.go index 2a7c9ee..59899f9 100644 --- a/utils.go +++ b/utils.go @@ -1,6 +1,7 @@ package aio import ( + "bytes" "errors" "fmt" "io" @@ -41,7 +42,7 @@ func installed(program string) error { } // Runs ffprobe on the given file and returns a map of the metadata. -func ffprobe(filename, stype string) (map[string]string, error) { +func ffprobe(filename, stype string) ([]map[string]string, error) { // "stype" is stream stype. "v" for video, "a" for audio. // Extract media metadata information with ffprobe. cmd := exec.Command( @@ -61,32 +62,42 @@ func ffprobe(filename, stype string) (map[string]string, error) { if err := cmd.Start(); err != nil { return nil, err } + // Read ffprobe output from Stdout. - buffer := make([]byte, 2<<10) - total := 0 + builder := bytes.Buffer{} + buffer := make([]byte, 1024) for { - n, err := pipe.Read(buffer[total:]) - total += n + n, err := pipe.Read(buffer) + builder.Write(buffer[:n]) if err == io.EOF { break } } + // Wait for ffprobe command to complete. if err := cmd.Wait(); err != nil { return nil, err } // Parse ffprobe output to fill in audio data. - data := make(map[string]string) - for _, line := range strings.Split(string(buffer[:total]), "|") { - if strings.Contains(line, "=") { - keyValue := strings.Split(line, "=") - if _, ok := data[keyValue[0]]; !ok { - data[keyValue[0]] = keyValue[1] + datalist := make([]map[string]string, 0) + metadata := string(builder.String()) + for _, stream := range strings.Split(metadata, "\n") { + if len(strings.TrimSpace(stream)) > 0 { + data := make(map[string]string) + for _, line := range strings.Split(stream, "|") { + if strings.Contains(line, "=") { + keyValue := strings.Split(line, "=") + if _, ok := data[keyValue[0]]; !ok { + data[keyValue[0]] = keyValue[1] + } + } } + datalist = append(datalist, data) } } - return data, nil + + return datalist, nil } // Parses the given data into a float64. @@ -114,12 +125,10 @@ func microphone() (string, error) { // For webcam streaming on windows, ffmpeg requires a device name. // All device names are parsed and returned by this function. -func parseDevices(buffer []byte) []string { - bufferstr := string(buffer) - - index := strings.Index(strings.ToLower(bufferstr), "directshow audio device") +func parseDevices(buffer string) []string { + index := strings.Index(strings.ToLower(buffer), "directshow audio device") if index != -1 { - bufferstr = bufferstr[index:] + buffer = buffer[index:] } type Pair struct { @@ -132,7 +141,7 @@ func parseDevices(buffer []byte) []string { pairs := []Pair{} // Find all device names surrounded by quotes. E.g "Windows Camera Front" regex := regexp.MustCompile("\"[^\"]+\"") - for _, line := range strings.Split(strings.ReplaceAll(bufferstr, "\r\n", "\n"), "\n") { + for _, line := range strings.Split(strings.ReplaceAll(buffer, "\r\n", "\n"), "\n") { if strings.Contains(strings.ToLower(line), "alternative name") { match := regex.FindString(line) if len(match) > 0 { @@ -195,18 +204,21 @@ func getDevicesWindows() ([]string, error) { if err := cmd.Start(); err != nil { return nil, err } + // Read list devices from Stdout. - buffer := make([]byte, 2<<10) - total := 0 + builder := bytes.Buffer{} + buffer := make([]byte, 1024) for { - n, err := pipe.Read(buffer[total:]) - total += n + n, err := pipe.Read(buffer) + builder.Write(buffer[:n]) if err == io.EOF { break } } + cmd.Wait() - devices := parseDevices(buffer) + + devices := parseDevices(builder.String()) return devices, nil } @@ -231,7 +243,7 @@ func endianness() string { } // Alias the byte buffer as a certain type specified by the format string. -func convertBytesToSamples(buffer []byte, size int, format string) interface{} { +func bytesToSamples(buffer []byte, size int, format string) interface{} { switch format { case "f32be", "f32le": var data []float32 @@ -287,7 +299,7 @@ func convertBytesToSamples(buffer []byte, size int, format string) interface{} { } } -func convertSamplesToBytes(data interface{}) []byte { +func samplesToBytes(data interface{}) []byte { var buffer []byte pointer := (*reflect.SliceHeader)(unsafe.Pointer(&buffer))