Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to by pass audio encoder if pre-encoded set #120

Draft
wants to merge 4 commits into
base: m114_release
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/audio_codecs/L16/audio_encoder_L16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ absl::optional<AudioEncoderL16::Config> AudioEncoderL16::SdpToConfig(
}
Config config;
config.sample_rate_hz = format.clockrate_hz;
config.pre_encoded = format.pre_encoded;
config.num_channels = rtc::dchecked_cast<int>(format.num_channels);
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
Expand Down Expand Up @@ -66,6 +67,7 @@ std::unique_ptr<AudioEncoder> AudioEncoderL16::MakeAudioEncoder(
c.num_channels = config.num_channels;
c.frame_size_ms = config.frame_size_ms;
c.payload_type = payload_type;
c.pre_encoded = config.pre_encoded;
if (!config.IsOk()) {
RTC_DCHECK_NOTREACHED();
return nullptr;
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/L16/audio_encoder_L16.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct RTC_EXPORT AudioEncoderL16 {
int sample_rate_hz = 8000;
int num_channels = 1;
int frame_size_ms = 10;
bool pre_encoded = false;
};
static absl::optional<Config> SdpToConfig(const SdpAudioFormat& audio_format);
static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
Expand Down
14 changes: 14 additions & 0 deletions api/audio_codecs/audio_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,19 @@ ANAStats AudioEncoder::GetANAStats() const {
return ANAStats();
}

size_t AudioEncoder::AppendPreEncodeData(rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) {
const size_t old_size = encoded->size();
for (const int16_t it : audio) {
uint8_t arr[2] = {
static_cast<uint8_t>((it >> 8) & 0x00ff),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is endianess an issue here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed we will have decoder (and this is how I tested), so the most important was to be consistent with decoder.
Otherwise, this is good question. I think I'll convert it to Little Endian (as more popular architecture). Another alternative is to rely on real architecture (by using C like union implementation) or expose this information as part of API, but It may look too complicated, because who knows what output SDP device will decode it.

static_cast<uint8_t>(it & 0x00ff),
};

encoded->AppendData(arr, 2);
}
return (encoded->size() - old_size);
}

constexpr int AudioEncoder::kMaxNumberOfChannels;
} // namespace webrtc
4 changes: 4 additions & 0 deletions api/audio_codecs/audio_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,10 @@ class AudioEncoder {
virtual EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) = 0;

// The AppendPreEncodeData function adds raw audio data to the end of the encoded buffer.
virtual size_t AppendPreEncodeData(rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded);
};
} // namespace webrtc
#endif // API_AUDIO_CODECS_AUDIO_ENCODER_H_
11 changes: 8 additions & 3 deletions api/audio_codecs/audio_format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ SdpAudioFormat::SdpAudioFormat(SdpAudioFormat&&) = default;
SdpAudioFormat::SdpAudioFormat(absl::string_view name,
int clockrate_hz,
size_t num_channels)
: name(name), clockrate_hz(clockrate_hz), num_channels(num_channels) {}
: name(name),
clockrate_hz(clockrate_hz),
num_channels(num_channels),
pre_encoded(false) {}

SdpAudioFormat::SdpAudioFormat(absl::string_view name,
int clockrate_hz,
Expand All @@ -31,7 +34,8 @@ SdpAudioFormat::SdpAudioFormat(absl::string_view name,
: name(name),
clockrate_hz(clockrate_hz),
num_channels(num_channels),
parameters(param) {}
parameters(param),
pre_encoded(false) {}

SdpAudioFormat::SdpAudioFormat(absl::string_view name,
int clockrate_hz,
Expand All @@ -40,7 +44,8 @@ SdpAudioFormat::SdpAudioFormat(absl::string_view name,
: name(name),
clockrate_hz(clockrate_hz),
num_channels(num_channels),
parameters(std::move(param)) {}
parameters(std::move(param)),
pre_encoded(false) {}

bool SdpAudioFormat::Matches(const SdpAudioFormat& o) const {
return absl::EqualsIgnoreCase(name, o.name) &&
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/audio_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ struct RTC_EXPORT SdpAudioFormat {
int clockrate_hz;
size_t num_channels;
Parameters parameters;
bool pre_encoded;
};

// Information about how an audio format is treated by the codec implementation.
Expand Down
3 changes: 3 additions & 0 deletions api/audio_codecs/g711/audio_encoder_g711.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ absl::optional<AudioEncoderG711::Config> AudioEncoderG711::SdpToConfig(
config.type = is_pcmu ? Config::Type::kPcmU : Config::Type::kPcmA;
config.num_channels = rtc::dchecked_cast<int>(format.num_channels);
config.frame_size_ms = 20;
config.pre_encoded = format.pre_encoded;
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
const auto ptime = rtc::StringToNumber<int>(ptime_iter->second);
Expand Down Expand Up @@ -75,13 +76,15 @@ std::unique_ptr<AudioEncoder> AudioEncoderG711::MakeAudioEncoder(
AudioEncoderPcmU::Config impl_config;
impl_config.num_channels = config.num_channels;
impl_config.frame_size_ms = config.frame_size_ms;
impl_config.pre_encoded = config.pre_encoded;
impl_config.payload_type = payload_type;
return std::make_unique<AudioEncoderPcmU>(impl_config);
}
case Config::Type::kPcmA: {
AudioEncoderPcmA::Config impl_config;
impl_config.num_channels = config.num_channels;
impl_config.frame_size_ms = config.frame_size_ms;
impl_config.pre_encoded = config.pre_encoded;
impl_config.payload_type = payload_type;
return std::make_unique<AudioEncoderPcmA>(impl_config);
}
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/g711/audio_encoder_g711.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct RTC_EXPORT AudioEncoderG711 {
Type type = Type::kPcmU;
int num_channels = 1;
int frame_size_ms = 20;
bool pre_encoded = false;
};
static absl::optional<AudioEncoderG711::Config> SdpToConfig(
const SdpAudioFormat& audio_format);
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/g722/audio_encoder_g722.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ absl::optional<AudioEncoderG722Config> AudioEncoderG722::SdpToConfig(

AudioEncoderG722Config config;
config.num_channels = rtc::checked_cast<int>(format.num_channels);
config.pre_encoded = format.pre_encoded;
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
auto ptime = rtc::StringToNumber<int>(ptime_iter->second);
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/g722/audio_encoder_g722_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct AudioEncoderG722Config {
}
int frame_size_ms = 20;
int num_channels = 1;
bool pre_encoded = false;
};

} // namespace webrtc
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/ilbc/audio_encoder_ilbc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ absl::optional<AudioEncoderIlbcConfig> AudioEncoderIlbc::SdpToConfig(
}

AudioEncoderIlbcConfig config;
config.pre_encoded = format.pre_encoded;
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
auto ptime = rtc::StringToNumber<int>(ptime_iter->second);
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/ilbc/audio_encoder_ilbc_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct AudioEncoderIlbcConfig {
int frame_size_ms = 30; // Valid values are 20, 30, 40, and 60 ms.
// Note that frame size 40 ms produces encodings with two 20 ms frames in
// them, and frame size 60 ms consists of two 30 ms frames.
bool pre_encoded = false;
};

} // namespace webrtc
Expand Down
3 changes: 2 additions & 1 deletion api/audio_codecs/opus/audio_encoder_opus_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ AudioEncoderOpusConfig::AudioEncoderOpusConfig()
complexity_threshold_window_bps(1500),
dtx_enabled(false),
uplink_bandwidth_update_interval_ms(200),
payload_type(-1) {}
payload_type(-1),
pre_encoded(false) {}
AudioEncoderOpusConfig::AudioEncoderOpusConfig(const AudioEncoderOpusConfig&) =
default;
AudioEncoderOpusConfig::~AudioEncoderOpusConfig() = default;
Expand Down
2 changes: 2 additions & 0 deletions api/audio_codecs/opus/audio_encoder_opus_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ struct RTC_EXPORT AudioEncoderOpusConfig {
// NOTE: This member isn't necessary, and will soon go away. See
// https://bugs.chromium.org/p/webrtc/issues/detail?id=7847
int payload_type;

bool pre_encoded;
};

} // namespace webrtc
Expand Down
5 changes: 4 additions & 1 deletion api/audio_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ void AudioOptions::SetAll(const AudioOptions& change) {
SetFrom(&audio_network_adaptor, change.audio_network_adaptor);
SetFrom(&audio_network_adaptor_config, change.audio_network_adaptor_config);
SetFrom(&init_recording_on_send, change.init_recording_on_send);
SetFrom(&pre_encoded, change.pre_encoded);
}

bool AudioOptions::operator==(const AudioOptions& o) const {
Expand All @@ -75,7 +76,8 @@ bool AudioOptions::operator==(const AudioOptions& o) const {
combined_audio_video_bwe == o.combined_audio_video_bwe &&
audio_network_adaptor == o.audio_network_adaptor &&
audio_network_adaptor_config == o.audio_network_adaptor_config &&
init_recording_on_send == o.init_recording_on_send;
init_recording_on_send == o.init_recording_on_send &&
pre_encoded == o.pre_encoded;
}

std::string AudioOptions::ToString() const {
Expand All @@ -100,6 +102,7 @@ std::string AudioOptions::ToString() const {
ToStringIfSet(&result, "combined_audio_video_bwe", combined_audio_video_bwe);
ToStringIfSet(&result, "audio_network_adaptor", audio_network_adaptor);
ToStringIfSet(&result, "init_recording_on_send", init_recording_on_send);
ToStringIfSet(&result, "pre_encoded", pre_encoded);
result << "}";
return result.str();
}
Expand Down
2 changes: 2 additions & 0 deletions api/audio_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ struct RTC_EXPORT AudioOptions {
// true.
// TODO(webrtc:13566): Remove this option. See issue for details.
absl::optional<bool> init_recording_on_send;
// Audio is already pre-encoded, so we can pass encoding.
absl::optional<bool> pre_encoded;
};

} // namespace cricket
Expand Down
13 changes: 0 additions & 13 deletions audio/audio_transport_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,6 @@ int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
int64_t* elapsed_time_ms,
int64_t* ntp_time_ms) {
TRACE_EVENT0("webrtc", "AudioTransportImpl::SendProcessedData");
RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
RTC_DCHECK_GE(nChannels, 1);
RTC_DCHECK_LE(nChannels, 2);
RTC_DCHECK_GE(
samplesPerSec,
static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz));

// 100 = 1 second / data duration (10 ms).
RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these checks prevent using pre-encoded data? Since it's on the receive end, seems like this should be unaffected? In particular, dropping the 10ms rule seems like a pretty big deal since it's assumed fairly throughout the system that play data would be given in 10ms chunks.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll double check the samples one (it should be fine). For sure if you link rust-sdk examples with debug version of webrtc it will crash in this place, so I commented this block for testing purpose.

#
# Fatal error in: ../../../../_source/macos_arm64/webrtc/src/audio/audio_transport_impl.cc, line 213
# last system error: 0
# Check failed: sizeof(int16_t) * nChannels == nBytesPerSample (4 vs. 2)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also worried about OggReader where we have:

const OGG_PAGE_DURATION: Duration = Duration::from_millis(20);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can change that to be 10. since webrtc's internal clock for audio is 10ms

RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
AudioFrame::kMaxDataSizeBytes);

mixer_->Mix(nChannels, &mixed_frame_);
*elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
Expand All @@ -229,12 +218,10 @@ int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
if (audio_processing_) {
const auto error =
ProcessReverseAudioFrame(audio_processing_, &mixed_frame_);
RTC_DCHECK_EQ(error, AudioProcessing::kNoError);
}

nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
static_cast<int16_t*>(audioSamples));
RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples);
return 0;
}

Expand Down
2 changes: 2 additions & 0 deletions media/engine/webrtc_voice_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ void WebRtcVoiceEngine::Init() {
options.audio_jitter_buffer_max_packets = 200;
options.audio_jitter_buffer_fast_accelerate = false;
options.audio_jitter_buffer_min_delay_ms = 0;
options.pre_encoded = false;
ApplyOptions(options);
}
initialized_ = true;
Expand Down Expand Up @@ -1650,6 +1651,7 @@ bool WebRtcVoiceMediaChannel::SetSendCodecs(
IsCodec(voice_codec, kRedCodecName))) {
webrtc::SdpAudioFormat format(voice_codec.name, voice_codec.clockrate,
voice_codec.channels, voice_codec.params);
format.pre_encoded = options_.pre_encoded.value_or(false);

voice_codec_info = engine()->encoder_factory_->QueryAudioEncoder(format);
if (!voice_codec_info) {
Expand Down
20 changes: 12 additions & 8 deletions modules/audio_coding/codecs/g711/audio_encoder_pcm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ AudioEncoderPcm::AudioEncoderPcm(const Config& config, int sample_rate_hz)
static_cast<size_t>(config.frame_size_ms / 10)),
full_frame_samples_(config.num_channels * config.frame_size_ms *
sample_rate_hz / 1000),
first_timestamp_in_buffer_(0) {
first_timestamp_in_buffer_(0),
pre_encoded_(config.pre_encoded) {
RTC_CHECK_GT(sample_rate_hz, 0) << "Sample rate must be larger than 0 Hz";
RTC_CHECK_EQ(config.frame_size_ms % 10, 0)
<< "Frame size must be an integer multiple of 10 ms.";
Expand Down Expand Up @@ -74,13 +75,16 @@ AudioEncoder::EncodedInfo AudioEncoderPcm::EncodeImpl(
EncodedInfo info;
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.encoded_bytes = encoded->AppendData(
full_frame_samples_ * BytesPerSample(),
[&](rtc::ArrayView<uint8_t> encoded) {
return EncodeCall(&speech_buffer_[0], full_frame_samples_,
encoded.data());
});
speech_buffer_.clear();
if (pre_encoded_) {
info.encoded_bytes = AppendPreEncodeData(audio, encoded);
} else {
info.encoded_bytes = encoded->AppendData(
full_frame_samples_ * BytesPerSample(),
[&](rtc::ArrayView<uint8_t> encoded) {
return EncodeCall(&speech_buffer_[0], full_frame_samples_,
encoded.data());
});
} speech_buffer_.clear();
info.encoder_type = GetCodecType();
return info;
}
Expand Down
4 changes: 3 additions & 1 deletion modules/audio_coding/codecs/g711/audio_encoder_pcm.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ class AudioEncoderPcm : public AudioEncoder {
int frame_size_ms;
size_t num_channels;
int payload_type;
bool pre_encoded;

protected:
explicit Config(int pt)
: frame_size_ms(20), num_channels(1), payload_type(pt) {}
: frame_size_ms(20), num_channels(1), payload_type(pt), pre_encoded(false) {}
};

~AudioEncoderPcm() override;
Expand Down Expand Up @@ -67,6 +68,7 @@ class AudioEncoderPcm : public AudioEncoder {
const int sample_rate_hz_;
const size_t num_channels_;
const int payload_type_;
bool pre_encoded_;
const size_t num_10ms_frames_per_packet_;
const size_t full_frame_samples_;
std::vector<int16_t> speech_buffer_;
Expand Down
69 changes: 38 additions & 31 deletions modules/audio_coding/codecs/g722/audio_encoder_g722.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ AudioEncoderG722Impl::AudioEncoderG722Impl(const AudioEncoderG722Config& config,
num_10ms_frames_buffered_(0),
first_timestamp_in_buffer_(0),
encoders_(new EncoderState[num_channels_]),
interleave_buffer_(2 * num_channels_) {
interleave_buffer_(2 * num_channels_),
pre_encoded_(config.pre_encoded) {
RTC_CHECK(config.IsOk());
const size_t samples_per_channel =
kSampleRateHz / 100 * num_10ms_frames_per_packet_;
Expand Down Expand Up @@ -103,38 +104,44 @@ AudioEncoder::EncodedInfo AudioEncoderG722Impl::EncodeImpl(
return EncodedInfo();
}

// Encode each channel separately.
RTC_CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_);
num_10ms_frames_buffered_ = 0;
const size_t samples_per_channel = SamplesPerChannel();
for (size_t i = 0; i < num_channels_; ++i) {
const size_t bytes_encoded = WebRtcG722_Encode(
encoders_[i].encoder, encoders_[i].speech_buffer.get(),
samples_per_channel, encoders_[i].encoded_buffer.data());
RTC_CHECK_EQ(bytes_encoded, samples_per_channel / 2);
}

const size_t bytes_to_encode = samples_per_channel / 2 * num_channels_;
EncodedInfo info;
info.encoded_bytes = encoded->AppendData(
bytes_to_encode, [&](rtc::ArrayView<uint8_t> encoded) {
// Interleave the encoded bytes of the different channels. Each separate
// channel and the interleaved stream encodes two samples per byte, most
// significant half first.
for (size_t i = 0; i < samples_per_channel / 2; ++i) {
for (size_t j = 0; j < num_channels_; ++j) {
uint8_t two_samples = encoders_[j].encoded_buffer.data()[i];
interleave_buffer_.data()[j] = two_samples >> 4;
interleave_buffer_.data()[num_channels_ + j] = two_samples & 0xf;
if (pre_encoded_) {
info.encoded_bytes = AppendPreEncodeData(audio, encoded);
} else {
// Encode each channel separately.
RTC_CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_);
num_10ms_frames_buffered_ = 0;
const size_t samples_per_channel = SamplesPerChannel();
for (size_t i = 0; i < num_channels_; ++i) {
const size_t bytes_encoded = WebRtcG722_Encode(
encoders_[i].encoder, encoders_[i].speech_buffer.get(),
samples_per_channel, encoders_[i].encoded_buffer.data());
RTC_CHECK_EQ(bytes_encoded, samples_per_channel / 2);
}

const size_t bytes_to_encode = samples_per_channel / 2 * num_channels_;

info.encoded_bytes = encoded->AppendData(
bytes_to_encode, [&](rtc::ArrayView<uint8_t> encoded) {
// Interleave the encoded bytes of the different channels. Each separate
// channel and the interleaved stream encodes two samples per byte, most
// significant half first.
for (size_t i = 0; i < samples_per_channel / 2; ++i) {
for (size_t j = 0; j < num_channels_; ++j) {
uint8_t two_samples = encoders_[j].encoded_buffer.data()[i];
interleave_buffer_.data()[j] = two_samples >> 4;
interleave_buffer_.data()[num_channels_ + j] = two_samples & 0xf;
}
for (size_t j = 0; j < num_channels_; ++j)
encoded[i * num_channels_ + j] =
interleave_buffer_.data()[2 * j] << 4 |
interleave_buffer_.data()[2 * j + 1];
}
for (size_t j = 0; j < num_channels_; ++j)
encoded[i * num_channels_ + j] =
interleave_buffer_.data()[2 * j] << 4 |
interleave_buffer_.data()[2 * j + 1];
}

return bytes_to_encode;
});

return bytes_to_encode;
});
}

info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.encoder_type = CodecType::kG722;
Expand Down
Loading