From 80d01748ea8901466127e8a9d956222d64389524 Mon Sep 17 00:00:00 2001 From: Alex Eidt Date: Sat, 3 Sep 2022 19:29:47 -0700 Subject: [PATCH] Abstracted away endianness --- README.md | 17 ++++++++++------- aio_test.go | 14 +++++++------- audio.go | 10 +++++----- audiowriter.go | 15 ++++++++------- microphone.go | 22 ++++++++++++---------- player.go | 30 +++++++++++++++++++++++++----- utils.go | 11 +++++++++++ 7 files changed, 78 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index c89da96..fdb8744 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ go get github.com/AlexEidt/aio ## Buffers -`aio` uses `byte` buffers to transport raw audio data. Audio data can take on many forms, including floating point, unsigned integer and signed integer. These types may be larger than a `byte` and would have to be split. Learn more about [available audio types](https://trac.ffmpeg.org/wiki/audio%20types) from the FFmpeg Wiki. `alaw` and `mulaw` formats are currently not supported. +`aio` uses `byte` buffers to transport raw audio data. Audio data can take on many forms, including floating point, unsigned integer and signed integer. These types may be larger than a `byte` and would have to be split. Valid formats are `u8`, `s8`, `u16`, `s16`, `u24`, `s24`, `u32`, `s32`, `f32`, and `f64`. These represent `u` unsigned integers, `s` signed integers and `f` floating point numbers. -As an example, if there is stereo sound (two channels) encoded in the `s16le` (signed 16 bit integers, little endian) format with a sampling rate of `44100 Hz`, one second of audio would be +As an example, if there is stereo sound (two channels) encoded in the `s16` (signed 16 bit integers) format with a sampling rate of `44100 Hz`, one second of audio would be ``` 44100 * 2 (channels) * 2 (bytes per sample) = 176400 bytes @@ -47,9 +47,9 @@ sample rate * channels * bytes per sample which corresponds to 1 second of audio data. -The user may pass in `options` to set the desired sampling rate, format and channels of the audio. If `options` is `nil`, then the channels and sampling rate from the file will be used, with a default format of `"s16le"`. +The user may pass in `options` to set the desired sampling rate, format and channels of the audio. If `options` is `nil`, then the channels and sampling rate from the file will be used, with a default format of `s16`. -Note that the `Samples()` function is only present for convenience. It casts the raw byte buffer into the given audio data type determined by the `Format()` such that the underlying data buffers are the same. The `s24le`, `s24be`, `u24le` and `s24be` formats are not supported by the `Samples()` function since there is no type equivalent. Calling the `Samples()` function on 24-bit audio will return the raw byte buffer. +Note that the `Samples()` function is only present for convenience. It casts the raw byte buffer into the given audio data type determined by the `Format()` such that the underlying data buffers are the same. The `s24` and `u24` formats are not supported by the `Samples()` function since there is no type equivalent. Calling the `Samples()` function on 24-bit audio will return the raw byte buffer. The return value of the `Samples()` function will have to be cast into an array of the desired type (e.g. `audio.Samples().([]float32)`) @@ -74,7 +74,7 @@ Close() ## `AudioWriter` -`AudioWriter` is used to write audio to files from a buffer of audio samples. It comes with an `Options` struct that can be used to specify certain metadata of the output audio file. If `options` is `nil`, the defaults used are a sampling rate of `44100 Hz`, with `2` channels in the `"s16le"` format. +`AudioWriter` is used to write audio to files from a buffer of audio samples. It comes with an `Options` struct that can be used to specify certain metadata of the output audio file. If `options` is `nil`, the defaults used are a sampling rate of `44100 Hz`, with `2` channels in the `s16` format. ```go aio.NewAudioWriter(filename string, options *aio.Options) (*aio.AudioWriter, error) @@ -124,8 +124,11 @@ Close() `Player` is used to play audio from a buffer of audio samples. ```go -aio.NewPlayer(channels, sampleRate int, format string) (*aio.Player, error) +aio.NewPlayer(channels, samplerate int, format string) (*aio.Player, error) +SampleRate() int +Channels() int +Format() string Play(samples interface{}) error Close() ``` @@ -161,7 +164,7 @@ for audio.Read() { Capture 10 seconds of audio from the microphone. Audio is recorded at 44100 Hz stereo and is in signed 16 bit format. ```go -micOptions := aio.Options{Format: "s16le", Channels: 2, SampleRate: 44100} +micOptions := aio.Options{Format: "s16", Channels: 2, SampleRate: 44100} mic, _ := aio.NewMicrophone(0, &micOptions) defer mic.Close() diff --git a/aio_test.go b/aio_test.go index 797eae8..d3faadc 100644 --- a/aio_test.go +++ b/aio_test.go @@ -14,7 +14,7 @@ func assertEquals(actual, expected interface{}) { } func TestSamplesInt16(t *testing.T) { - audio, err := NewAudio("test/beach.mp3", &Options{Format: "u16le"}) + audio, err := NewAudio("test/beach.mp3", &Options{Format: "u16"}) if err != nil { panic(err) } @@ -41,7 +41,7 @@ func TestSamplesInt16(t *testing.T) { } func TestSamplesFloat64(t *testing.T) { - audio, err := NewAudio("test/beach.mp3", &Options{Format: "f64le"}) + audio, err := NewAudio("test/beach.mp3", &Options{Format: "f64"}) if err != nil { panic(err) } @@ -150,7 +150,7 @@ func TestAudioIO(t *testing.T) { assertEquals(audio.Channels(), 2) assertEquals(audio.Bitrate(), 128000) assertEquals(audio.Duration(), 1.032) - assertEquals(audio.Format(), "s16le") + assertEquals(audio.Format(), "s16") assertEquals(audio.Codec(), "mp3") assertEquals(audio.BitsPerSample(), 16) assertEquals(len(audio.Buffer()), 0) @@ -207,7 +207,7 @@ func TestAudioPlayback(t *testing.T) { } func TestAudioCopying(t *testing.T) { - audio, err1 := NewAudio("test/beach.mp3", &Options{Format: "s16be"}) + audio, err1 := NewAudio("test/beach.mp3", &Options{Format: "s16"}) if err1 != nil { panic(err1) } @@ -239,7 +239,7 @@ func TestAudioResampling(t *testing.T) { options := Options{ SampleRate: 4000, Channels: 1, - Format: "f32be", + Format: "f32", } audio, err1 := NewAudio("test/beach.mp3", &options) if err1 != nil { @@ -252,7 +252,7 @@ func TestAudioResampling(t *testing.T) { assertEquals(audio.Channels(), 1) assertEquals(audio.Bitrate(), 128000) assertEquals(audio.Duration(), 1.032) - assertEquals(audio.Format(), "f32be") + assertEquals(audio.Format(), "f32") assertEquals(audio.Codec(), "mp3") assertEquals(audio.BitsPerSample(), 32) assertEquals(len(audio.Buffer()), 0) @@ -322,7 +322,7 @@ func TestMicrophone(t *testing.T) { } seconds := 0 - for mic.Read() && seconds < 10 { + for mic.Read() && seconds < 3 { seconds++ } diff --git a/audio.go b/audio.go index aeacb2f..4aec6f2 100644 --- a/audio.go +++ b/audio.go @@ -13,10 +13,10 @@ import ( type Audio struct { filename string // Audio Filename. samplerate int // Audio Sample Rate in Hz. - channels int // Number of audio channels. 1 = mono, 2 = stereo. + channels int // Number of audio channels. bitrate int // Bitrate for audio encoding. duration float64 // Duration of audio in seconds. - format string // Format of audio. + format string // Format of audio samples. codec string // Codec used for video encoding. bps int // Bits per sample. buffer []byte // Raw audio data. @@ -47,7 +47,7 @@ func (audio *Audio) Duration() float64 { } func (audio *Audio) Format() string { - return audio.format + return audio.format[:len(audio.format)-2] } func (audio *Audio) Codec() string { @@ -104,9 +104,9 @@ func NewAudio(filename string, options *Options) (*Audio, error) { } if options.Format == "" { - audio.format = "s16le" + audio.format = fmt.Sprintf("s16%s", endianness()) } else { - audio.format = options.Format + audio.format = fmt.Sprintf("%s%s", options.Format, endianness()) } if err := checkFormat(audio.format); err != nil { diff --git a/audiowriter.go b/audiowriter.go index c913a27..145dc56 100644 --- a/audiowriter.go +++ b/audiowriter.go @@ -13,9 +13,9 @@ type AudioWriter struct { filename string // Output filename. video string // Video filename. samplerate int // Audio Sample Rate in Hz. - channels int // Number of audio channels. 1 = mono, 2 = stereo. + channels int // Number of audio channels. bitrate int // Bitrate for audio encoding. - format string // Format of audio. + format string // Format of audio samples. codec string // Codec used for video encoding. pipe *io.WriteCloser // Stdout pipe of ffmpeg process. cmd *exec.Cmd // ffmpeg command. @@ -39,7 +39,7 @@ func (writer *AudioWriter) Bitrate() int { } func (writer *AudioWriter) Format() string { - return writer.format + return writer.format[:len(writer.format)-2] } func (writer *AudioWriter) Codec() string { @@ -78,12 +78,13 @@ func NewAudioWriter(filename string, options *Options) (*AudioWriter, error) { writer.channels = options.Channels } - writer.format = "s16le" - if options.Format != "" { - if err := checkFormat(options.Format); err != nil { + if options.Format == "" { + writer.format = fmt.Sprintf("s16%s", endianness()) + } else { + writer.format = fmt.Sprintf("%s%s", options.Format, endianness()) + if err := checkFormat(writer.format); err != nil { return nil, err } - writer.format = options.Format } if options.Video != "" { diff --git a/microphone.go b/microphone.go index 2740046..9c0137c 100644 --- a/microphone.go +++ b/microphone.go @@ -15,8 +15,8 @@ import ( type Microphone struct { name string // Microphone device name. samplerate int // Audio Sample Rate in Hz. - channels int // Number of audio channels. 1 = mono, 2 = stereo. - format string // Format of audio. + channels int // Number of audio channels. + format string // Format of audio samples. bps int // Bits per sample. buffer []byte // Raw audio data. pipe *io.ReadCloser // Stdout pipe for ffmpeg process streaming microphone audio. @@ -37,7 +37,7 @@ func (mic *Microphone) Channels() int { } func (mic *Microphone) Format() string { - return mic.format + return mic.format[:len(mic.format)-2] } func (mic *Microphone) BitsPerSample() int { @@ -99,9 +99,15 @@ func NewMicrophone(stream int, options *Options) (*Microphone, error) { options = &Options{} } - mic.format = "s16le" // Default format. - if options.Format != "" { - mic.format = options.Format + mic.format = fmt.Sprintf("s16%s", endianness()) // Default format. + if options.Format == "" { + mic.format = fmt.Sprintf("s16%s", endianness()) + } else { + mic.format = fmt.Sprintf("%s%s", options.Format, endianness()) + } + + if err := checkFormat(mic.format); err != nil { + return nil, err } if options.SampleRate != 0 { @@ -112,10 +118,6 @@ func NewMicrophone(stream int, options *Options) (*Microphone, error) { mic.channels = options.Channels } - if err := checkFormat(mic.format); err != nil { - return nil, err - } - mic.bps = int(parse(regexp.MustCompile(`\d{1,2}`).FindString(mic.format))) // Bits per sample. return mic, nil diff --git a/player.go b/player.go index a4b842d..ca29e3e 100644 --- a/player.go +++ b/player.go @@ -47,16 +47,32 @@ func Play(filename string) error { } type Player struct { - pipe *io.WriteCloser // Stdin pipe for ffplay process. - cmd *exec.Cmd // ffplay command. + samplerate int // Audio Sample Rate in Hz. + channels int // Number of audio channels. + format string // Format of audio samples. + pipe *io.WriteCloser // Stdin pipe for ffplay process. + cmd *exec.Cmd // ffplay command. } -func NewPlayer(channels, sampleRate int, format string) (*Player, error) { +func (player *Player) SampleRate() int { + return player.samplerate +} + +func (player *Player) Channels() int { + return player.channels +} + +func (player *Player) Format() string { + return player.format[:len(player.format)-2] +} + +func NewPlayer(channels, samplerate int, format string) (*Player, error) { // Check if ffplay is installed on the users machine. if err := checkExists("ffplay"); err != nil { return nil, err } + format = fmt.Sprintf("%s%s", format, endianness()) if err := checkFormat(format); err != nil { return nil, err } @@ -65,14 +81,18 @@ func NewPlayer(channels, sampleRate int, format string) (*Player, error) { "ffplay", "-f", format, "-ac", fmt.Sprintf("%d", channels), - "-ar", fmt.Sprintf("%d", sampleRate), + "-ar", fmt.Sprintf("%d", samplerate), "-i", "-", "-nodisp", "-autoexit", "-loglevel", "quiet", ) - player := &Player{} + player := &Player{ + samplerate: samplerate, + channels: channels, + format: format, + } player.cmd = cmd pipe, err := cmd.StdinPipe() diff --git a/utils.go b/utils.go index 75cdf3b..0c395c3 100644 --- a/utils.go +++ b/utils.go @@ -213,6 +213,17 @@ func getDevicesWindows() ([]string, error) { return devices, nil } +// Little Endian -> "le", Big Endian -> "be". +func endianness() string { + x := 1 + littleEndian := *(*byte)(unsafe.Pointer(&x)) == 1 + if littleEndian { + return "le" + } else { + return "be" + } +} + // Alias the byte buffer as a certain type specified by the format string. func convertBytesToSamples(buffer []byte, size int, format string) interface{} { switch format {