Make start of talkspurt information available for sample api

algesten · Sep 3, 2024 · 2866d30 · 2866d30
1 parent b800a7b
commit 2866d30
Show file tree

Hide file tree

Showing 5 changed files with 146 additions and 4 deletions.
diff --git a/src/media/event.rs b/src/media/event.rs
@@ -94,7 +94,7 @@ pub struct MediaData {
     /// quantity. The numerator is the timestamp field from the RTP header, the denominator
     /// depends on whether this is an audio or video packet.
     ///
-    /// For audio the timebase is 48kHz for video it is 90kHz.
+    /// For audio the timebase is often 48kHz for video it is 90kHz.
     pub time: MediaTime,
 
     /// The time of the [`Input::Receive`][crate::Input::Receive] of the first packet that caused this MediaData.
@@ -131,6 +131,12 @@ pub struct MediaData {
     ///
     /// If no Sender Report(SR) has been received this is [`None`].
     pub last_sender_info: Option<SenderInfo>,
+
+    /// First packet of a talkspurt, that is the first packet after a silence period during
+    /// which packets have not been transmitted contiguously.
+    ///
+    /// For audio only when dtx or silence suppression is enabled.
+    pub audio_start_of_talk_spurt: bool,
 }
 
 /// Details for an incoming a keyframe request (PLI or FIR).

diff --git a/src/media/mod.rs b/src/media/mod.rs
@@ -268,6 +268,8 @@ impl Media {
                     ext_vals: dep.ext_vals().clone(),
                     codec_extra: dep.codec_extra,
                     last_sender_info: dep.first_sender_info(),
+                    audio_start_of_talk_spurt: codec.spec().codec.is_audio()
+                        && dep.start_of_talkspurt(),
                     data: dep.data,
                 }));
             }

diff --git a/src/packet/buffer_rx.rs b/src/packet/buffer_rx.rs
@@ -55,15 +55,36 @@ impl Depacketized {
     }
 
     pub fn seq_range(&self) -> RangeInclusive<SeqNo> {
-        let first = self.meta[0].seq_no;
-        let last = self.meta.last().expect("at least one element").seq_no;
+        let first = self
+            .meta
+            .first()
+            .expect("a depacketized to consist of at least one packet")
+            .seq_no;
+        let last = self
+            .meta
+            .last()
+            .expect("a depacketized to consist of at least one packet")
+            .seq_no;
         first..=last
     }
 
+    pub fn start_of_talkspurt(&self) -> bool {
+        self.meta
+            .first()
+            .expect("a depacketized to consist of at least one packet")
+            .header
+            .marker
+    }
+
     pub fn ext_vals(&self) -> &ExtensionValues {
         // We use the extensions from the last packet because certain extensions, such as video
         // orientation, are only added on the last packet to save bytes.
-        &self.meta[self.meta.len() - 1].header.ext_vals
+        &self
+            .meta
+            .last()
+            .expect("a depacketized to consist of at least one packet")
+            .header
+            .ext_vals
     }
 }
 

diff --git a/tests/common.rs b/tests/common.rs
@@ -230,7 +230,10 @@ pub fn connect_l_r() -> (TestRtc, TestRtc) {
         // release packet straight away
         .set_reordering_size_audio(0)
         .build();
+    connect_l_r_with_rtc(rtc1, rtc2)
+}
 
+pub fn connect_l_r_with_rtc(rtc1: Rtc, rtc2: Rtc) -> (TestRtc, TestRtc) {
     let mut l = TestRtc::new_with_rtc(info_span!("L"), rtc1);
     let mut r = TestRtc::new_with_rtc(info_span!("R"), rtc2);
 

diff --git a/tests/rtp_to_frame.rs b/tests/rtp_to_frame.rs
@@ -0,0 +1,110 @@
+use std::collections::VecDeque;
+use std::time::Duration;
+
+use str0m::format::Codec;
+use str0m::media::MediaKind;
+use str0m::rtp::{ExtensionValues, Ssrc};
+use str0m::{Event, Rtc, RtcError};
+
+mod common;
+use common::{connect_l_r_with_rtc, init_log, progress};
+
+#[test]
+pub fn audio_start_of_talk_spurt() -> Result<(), RtcError> {
+    init_log();
+
+    let rtc1 = Rtc::builder().set_rtp_mode(true).build();
+    let rtc2 = Rtc::builder().set_reordering_size_audio(0).build();
+
+    let (mut l, mut r) = connect_l_r_with_rtc(rtc1, rtc2);
+
+    let mid = "audio".into();
+    let ssrc_tx: Ssrc = 1337.into();
+
+    l.direct_api().declare_media(mid, MediaKind::Audio);
+    l.direct_api().declare_stream_tx(ssrc_tx, None, mid, None);
+    r.direct_api().declare_media(mid, MediaKind::Audio);
+
+    let max = l.last.max(r.last);
+    l.last = max;
+    r.last = max;
+
+    let params = l.params_opus();
+    let ssrc = l.direct_api().stream_tx_by_mid(mid, None).unwrap().ssrc();
+    assert_eq!(params.spec().codec, Codec::Opus);
+    let pt = params.pt();
+
+    let to_write: Vec<&[u8]> = vec![
+        // 1
+        &[0x1, 0x2, 0x3, 0x4],
+        // 3
+        &[0x9, 0xa, 0xb, 0xc],
+        // 2
+        &[0x5, 0x6, 0x7, 0x8],
+    ];
+
+    let mut to_write: VecDeque<_> = to_write.into();
+
+    let mut write_at = l.last + Duration::from_millis(300);
+
+    let mut counts: Vec<u64> = vec![0, 3, 1];
+
+    loop {
+        if l.start + l.duration() > write_at {
+            write_at = l.last + Duration::from_millis(300);
+            if let Some(packet) = to_write.pop_front() {
+                let wallclock = l.start + l.duration();
+
+                let mut direct = l.direct_api();
+                let stream = direct.stream_tx(&ssrc).unwrap();
+
+                let count = counts.remove(0);
+                let time = (count * 1000 + 47_000_000) as u32;
+                let seq_no = (47_000 + count).into();
+
+                let exts = ExtensionValues {
+                    audio_level: Some(-42 - count as i8),
+                    voice_activity: Some(false),
+                    ..Default::default()
+                };
+
+                stream
+                    .write_rtp(
+                        pt,
+                        seq_no,
+                        time,
+                        wallclock,
+                        *seq_no % 2 == 0, // set marker bit on every second packet
+                        exts,
+                        false,
+                        packet.to_vec(),
+                    )
+                    .expect("clean write");
+            }
+        }
+
+        progress(&mut l, &mut r)?;
+
+        if l.duration() > Duration::from_secs(10) {
+            break;
+        }
+    }
+
+    let media: Vec<_> = r
+        .events
+        .iter()
+        .filter_map(|(_, e)| {
+            if let Event::MediaData(v) = e {
+                Some(v)
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    for m in media {
+        assert!(m.audio_start_of_talk_spurt == (**m.seq_range.start() % 2 == 0));
+    }
+
+    Ok(())
+}