Skip to content

Commit

Permalink
Added support for multiple Audio streams
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexEidt committed Sep 10, 2022
1 parent d1ba724 commit da40bd3
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 121 deletions.
38 changes: 22 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The `Options` struct is used to specify optional parameters for Audio I/O.

```go
type Options struct {
Stream int // Audio Stream Index to use.
SampleRate int // Sample rate in Hz.
Channels int // Number of channels.
Bitrate int // Bitrate.
Expand All @@ -49,12 +50,15 @@ which corresponds to 1 second of audio data.

The user may pass in `options` to set the desired sampling rate, format and channels of the audio. If `options` is `nil`, then the channels and sampling rate from the file will be used, with a default format of `s16`.

The `Read()` function fills the internal byte buffer with the next batch of audio samples. Once the entire file has been read, `Read()` will return `false` and close the `Audio` struct.

Note that the `Samples()` function is only present for convenience. It casts the raw byte buffer into the given audio data type determined by the `Format()` such that the underlying data buffers are the same. The `s24` and `u24` formats are not supported by the `Samples()` function since there is no type equivalent. Calling the `Samples()` function on 24-bit audio will return the raw byte buffer.

The return value of the `Samples()` function will have to be cast into an array of the desired type (e.g. `audio.Samples().([]float32)`)

```go
aio.NewAudio(filename string, options *aio.Options) (*aio.Audio, error)
aio.NewAudioStreams(filename string, options *Options) ([]*aio.Audio, error)

FileName() string
SampleRate() int
Expand All @@ -64,8 +68,10 @@ Duration() float64
Format() string
Codec() string
BitsPerSample() int
Stream() int
Total() int
Buffer() []byte
MetaData() map[string]string
Samples() interface{}
SetBuffer(buffer []byte) error

Expand Down Expand Up @@ -134,12 +140,6 @@ Play(samples interface{}) error
Close()
```

Additionally, files may be played directly using the `Play` function:

```go
aio.Play(filename string) error
```

## Examples

Copy `input.wav` to `output.mp3`.
Expand Down Expand Up @@ -185,15 +185,25 @@ for mic.Read() && seconds < 10 {
}
```

Play all audio tracks from `input.mp4` sequentially.

```go
streams, _ := aio.NewAudioStreams("input.mp4", nil)

for _, stream := range streams {
player, _ := aio.NewPlayer(stream.Channels(), stream.SampleRate(), stream.Format())
for stream.Read() {
player.Play(stream.Buffer())
}
player.Close()
}
```

Play `input.mp4`.

```go
audio, _ := aio.NewAudio("input.mp4", nil)
player, _ := aio.NewPlayer(
audio.Channels(),
audio.SampleRate(),
audio.Format(),
)
player, _ := aio.NewPlayer(audio.Channels(), audio.SampleRate(), audio.Format())
defer player.Close()

for audio.Read() {
Expand Down Expand Up @@ -242,11 +252,7 @@ Play Microphone audio. Use default microphone settings for recording.
mic, _ := aio.NewMicrophone(0, nil)
defer mic.Close()

player, _ := aio.NewPlayer(
mic.Channels(),
mic.SampleRate(),
mic.Format(),
)
player, _ := aio.NewPlayer(mic.Channels(), mic.SampleRate(), mic.Format())
defer player.Close()

for mic.Read() {
Expand Down
4 changes: 2 additions & 2 deletions aio_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ func TestAudioIO(t *testing.T) {
assertEquals(audio.Format(), "s16")
assertEquals(audio.Codec(), "mp3")
assertEquals(audio.BitsPerSample(), 16)
assertEquals(audio.Stream(), 0)
assertEquals(len(audio.Buffer()), 0)

fmt.Println("Audio File IO test passed")
Expand Down Expand Up @@ -278,7 +279,7 @@ func TestAudioResampling(t *testing.T) {
func TestDeviceParsingWindows(t *testing.T) {
// Sample string taken from FFmpeg wiki:
data := parseDevices(
[]byte(`ffmpeg version N-45279-g6b86dd5... --enable-runtime-cpudetect
`ffmpeg version N-45279-g6b86dd5... --enable-runtime-cpudetect
libavutil 51. 74.100 / 51. 74.100
libavcodec 54. 65.100 / 54. 65.100
libavformat 54. 31.100 / 54. 31.100
Expand All @@ -293,7 +294,6 @@ func TestDeviceParsingWindows(t *testing.T) {
[dshow @ 03ACF580] "Internal Microphone (Conexant 2"
[dshow @ 03ACF580] "virtual-audio-capturer"
dummy: Immediate exit requested`,
),
)

assertEquals(data[0], "Internal Microphone (Conexant 2")
Expand Down
97 changes: 71 additions & 26 deletions audio.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@ import (
)

type Audio struct {
filename string // Audio Filename.
samplerate int // Audio Sample Rate in Hz.
channels int // Number of audio channels.
bitrate int // Bitrate for audio encoding.
duration float64 // Duration of audio in seconds.
format string // Format of audio samples.
codec string // Codec used for video encoding.
bps int // Bits per sample.
buffer []byte // Raw audio data.
pipe *io.ReadCloser // Stdout pipe for ffmpeg process.
cmd *exec.Cmd // ffmpeg command.
filename string // Audio Filename.
samplerate int // Audio Sample Rate in Hz.
channels int // Number of audio channels.
bitrate int // Bitrate for audio encoding.
duration float64 // Duration of audio in seconds.
format string // Format of audio samples.
codec string // Codec used for video encoding.
bps int // Bits per sample.
stream int // Stream Index.
buffer []byte // Raw audio data.
metadata map[string]string // Audio Metadata.
pipe *io.ReadCloser // Stdout pipe for ffmpeg process.
cmd *exec.Cmd // ffmpeg command.
}

func (audio *Audio) FileName() string {
Expand Down Expand Up @@ -64,6 +66,11 @@ func (audio *Audio) BitsPerSample() int {
return audio.bps
}

// Returns the zero-indexed audio stream index.
func (audio *Audio) Stream() int {
return audio.stream
}

// Returns the total number of audio samples in the file.
func (audio *Audio) Total() int {
frame := audio.channels * audio.bps / 8
Expand All @@ -76,9 +83,14 @@ func (audio *Audio) Buffer() []byte {
return audio.buffer
}

// Raw Metadata from ffprobe output for the audio file.
func (audio *Audio) MetaData() map[string]string {
return audio.metadata
}

// Casts the values in the byte buffer to those specified by the audio format.
func (audio *Audio) Samples() interface{} {
return convertBytesToSamples(audio.buffer, len(audio.buffer)/(audio.bps/8), audio.format)
return bytesToSamples(audio.buffer, len(audio.buffer)/(audio.bps/8), audio.format)
}

// Sets the buffer to the given byte array. The length of the buffer must be a multiple
Expand All @@ -92,6 +104,24 @@ func (audio *Audio) SetBuffer(buffer []byte) error {
}

func NewAudio(filename string, options *Options) (*Audio, error) {
if options == nil {
options = &Options{}
}

streams, err := NewAudioStreams(filename, options)
if streams == nil {
return nil, err
}

if options.Stream < 0 || options.Stream >= len(streams) {
return nil, fmt.Errorf("invalid stream index: %d, must be between 0 and %d", options.Stream, len(streams))
}

return streams[options.Stream], err
}

// Read all audio streams from the given file.
func NewAudioStreams(filename string, options *Options) ([]*Audio, error) {
if !exists(filename) {
return nil, fmt.Errorf("video file %s does not exist", filename)
}
Expand All @@ -112,36 +142,47 @@ func NewAudio(filename string, options *Options) (*Audio, error) {
return nil, fmt.Errorf("no audio data found in %s", filename)
}

audio := &Audio{filename: filename}

if options == nil {
options = &Options{}
}

var format string
if options.Format == "" {
audio.format = createFormat("s16") // s16 default format.
format = createFormat("s16") // s16 default format.
} else {
audio.format = createFormat(options.Format)
format = createFormat(options.Format)
}

if err := checkFormat(audio.format); err != nil {
if err := checkFormat(format); err != nil {
return nil, err
}

bps := int(parse(regexp.MustCompile(`\d{1,2}`).FindString(audio.format))) // Bits per sample.
audio.bps = bps
bps := int(parse(regexp.MustCompile(`\d{1,2}`).FindString(format))) // Bits per sample.

audio.addAudioData(audioData)
streams := make([]*Audio, len(audioData))
for i, data := range audioData {
audio := &Audio{
filename: filename,
format: format,
bps: bps,
stream: i,
metadata: data,
}

if options.SampleRate != 0 {
audio.samplerate = options.SampleRate
}
audio.addAudioData(data)

if options.SampleRate != 0 {
audio.samplerate = options.SampleRate
}

if options.Channels != 0 {
audio.channels = options.Channels
}

if options.Channels != 0 {
audio.channels = options.Channels
streams[i] = audio
}

return audio, nil
return streams, nil
}

// Adds audio data to the Audio struct from the ffprobe output.
Expand Down Expand Up @@ -176,11 +217,13 @@ func (audio *Audio) init() error {
"-acodec", fmt.Sprintf("pcm_%s", audio.format),
"-ar", fmt.Sprintf("%d", audio.samplerate),
"-ac", fmt.Sprintf("%d", audio.channels),
"-map", fmt.Sprintf("0:a:%d", audio.stream),
"-loglevel", "quiet",
"-",
)

audio.cmd = cmd

pipe, err := cmd.StdoutPipe()
if err != nil {
return err
Expand All @@ -206,6 +249,7 @@ func (audio *Audio) Read() bool {
return false
}
}

total := 0
for total < len(audio.buffer) {
n, err := (*audio.pipe).Read(audio.buffer[total:])
Expand All @@ -215,6 +259,7 @@ func (audio *Audio) Read() bool {
}
total += n
}

return true
}

Expand Down
2 changes: 1 addition & 1 deletion audiowriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func (writer *AudioWriter) init() error {

// Writes the given samples to the audio file.
func (writer *AudioWriter) Write(samples interface{}) error {
buffer := convertSamplesToBytes(samples)
buffer := samplesToBytes(samples)
if buffer == nil {
return fmt.Errorf("invalid sample data type")
}
Expand Down
26 changes: 13 additions & 13 deletions microphone.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package aio

import (
"bytes"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -54,7 +55,7 @@ func (mic *Microphone) Buffer() []byte {
}

func (mic *Microphone) Samples() interface{} {
return convertBytesToSamples(mic.buffer, len(mic.buffer)/(mic.bps/8), mic.format)
return bytesToSamples(mic.buffer, len(mic.buffer)/(mic.bps/8), mic.format)
}

// Sets the buffer to the given byte array. The length of the buffer must be a multiple
Expand Down Expand Up @@ -128,25 +129,24 @@ func NewMicrophone(stream int, options *Options) (*Microphone, error) {
}

// Parses the microphone metadata from ffmpeg output.
func (mic *Microphone) parseMicrophoneData(buffer []byte) {
bufferstr := string(buffer)
func (mic *Microphone) parseMicrophoneData(buffer string) {
// Sample String: "Stream #0:0: Audio: pcm_s16le, 44100 Hz, stereo, s16, 1411 kb/s".
index := strings.Index(bufferstr, "Stream #")
index := strings.Index(buffer, "Stream #")
if index == -1 {
index++
}
bufferstr = bufferstr[index:]
buffer = buffer[index:]
// Sample rate.
regex := regexp.MustCompile(`\d+ Hz`)
match := regex.FindString(bufferstr)
match := regex.FindString(buffer)
if len(match) > 0 {
mic.samplerate = int(parse(match[:len(match)-len(" Hz")]))
}

mic.channels = 2 // stereo by default.
if strings.Contains(bufferstr, "stereo") {
if strings.Contains(buffer, "stereo") {
mic.channels = 2
} else if strings.Contains(bufferstr, "mono") {
} else if strings.Contains(buffer, "mono") {
mic.channels = 1
}
}
Expand Down Expand Up @@ -175,19 +175,19 @@ func (mic *Microphone) getMicrophoneData(device string) error {
return err
}
// Read ffmpeg output from Stdout.
buffer := make([]byte, 2<<11)
total := 0
builder := bytes.Buffer{}
buffer := make([]byte, 1024)
for {
n, err := pipe.Read(buffer[total:])
total += n
n, err := pipe.Read(buffer)
builder.Write(buffer[:n])
if err == io.EOF {
break
}
}
// Wait for the command to finish.
cmd.Wait()

mic.parseMicrophoneData(buffer[:total])
mic.parseMicrophoneData(builder.String())
return nil
}

Expand Down
1 change: 1 addition & 0 deletions options.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package aio

type Options struct {
Stream int // Audio Stream Index to use.
SampleRate int // Sample rate in Hz.
Channels int // Number of channels.
Bitrate int // Bitrate.
Expand Down
Loading

0 comments on commit da40bd3

Please sign in to comment.