From 089691006a9328924ce3ca42d9e5180c25ab2718 Mon Sep 17 00:00:00 2001 From: Rob Walch Date: Sat, 4 Jan 2025 13:45:24 -0800 Subject: [PATCH 1/2] Exclude AC3 and HEVC remuxing from light build Improve demux/remux types and type mp4-generator args --- src/demux/audio/base-audio-demuxer.ts | 12 +-- src/demux/tsdemuxer.ts | 5 +- src/demux/video/hevc-video-parser.ts | 24 ++--- src/remux/mp4-generator.ts | 144 ++++++++++++++++++-------- src/remux/mp4-remuxer.ts | 62 +++++------ src/types/demuxer.ts | 42 +++++++- src/types/remuxer.ts | 25 +++++ 7 files changed, 209 insertions(+), 105 deletions(-) diff --git a/src/demux/audio/base-audio-demuxer.ts b/src/demux/audio/base-audio-demuxer.ts index 45753618b9a..9b30e0c8295 100644 --- a/src/demux/audio/base-audio-demuxer.ts +++ b/src/demux/audio/base-audio-demuxer.ts @@ -18,8 +18,8 @@ import { dummyTrack } from '../dummy-demuxed-track'; import type { RationalTimestamp } from '../../utils/timescale-conversion'; class BaseAudioDemuxer implements Demuxer { - protected _audioTrack!: DemuxedAudioTrack; - protected _id3Track!: DemuxedMetadataTrack; + protected _audioTrack?: DemuxedAudioTrack; + protected _id3Track?: DemuxedMetadataTrack; protected frameIndex: number = 0; protected cachedData: Uint8Array | null = null; protected basePTS: number | null = null; @@ -74,8 +74,8 @@ class BaseAudioDemuxer implements Demuxer { let id3Data: Uint8Array | undefined = getId3Data(data, 0); let offset = id3Data ? id3Data.length : 0; let lastDataIndex; - const track = this._audioTrack; - const id3Track = this._id3Track; + const track = this._audioTrack as DemuxedAudioTrack; + const id3Track = this._id3Track as DemuxedMetadataTrack; const timestamp = id3Data ? getId3Timestamp(id3Data) : undefined; const length = data.length; @@ -167,9 +167,9 @@ class BaseAudioDemuxer implements Demuxer { } return { - audioTrack: this._audioTrack, + audioTrack: this._audioTrack as DemuxedAudioTrack, videoTrack: dummyTrack() as DemuxedVideoTrackBase, - id3Track: this._id3Track, + id3Track: this._id3Track as DemuxedMetadataTrack, textTrack: dummyTrack() as DemuxedUserdataTrack, }; } diff --git a/src/demux/tsdemuxer.ts b/src/demux/tsdemuxer.ts index 19777102a1f..3eae5dd668c 100644 --- a/src/demux/tsdemuxer.ts +++ b/src/demux/tsdemuxer.ts @@ -774,8 +774,8 @@ function parsePMT( audioPid: -1, videoPid: -1, id3Pid: -1, - segmentVideoCodec: 'avc', - segmentAudioCodec: 'aac', + segmentVideoCodec: 'avc' as 'avc' | 'hevc', + segmentAudioCodec: 'aac' as 'aac' | 'ac3' | 'mp3', }; const sectionLength = ((data[offset + 1] & 0x0f) << 8) | data[offset + 2]; const tableEnd = offset + 3 + sectionLength - 4; @@ -822,7 +822,6 @@ function parsePMT( // logger.log('AVC PID:' + pid); if (result.videoPid === -1) { result.videoPid = pid; - result.segmentVideoCodec = 'avc'; } break; diff --git a/src/demux/video/hevc-video-parser.ts b/src/demux/video/hevc-video-parser.ts index 978c045522f..a9c92d6aa50 100644 --- a/src/demux/video/hevc-video-parser.ts +++ b/src/demux/video/hevc-video-parser.ts @@ -158,12 +158,7 @@ class HevcVideoParser extends BaseVideoParser { track.params[prop] = config.params[prop]; } } - if ( - (!track.vps && !track.sps.length) || - (track.vps && track.vps[0] === this.initVPS) - ) { - track.sps.push(unit.data); - } + this.pushPPSorSPS(track.sps, unit.data, track.vps); if (!VideoSample) { VideoSample = this.VideoSample = this.createVideoSample( true, @@ -185,12 +180,7 @@ class HevcVideoParser extends BaseVideoParser { track.params[prop] = config[prop]; } } - if ( - (!track.vps && !track.pps.length) || - (track.vps && track.vps[0] === this.initVPS) - ) { - track.pps.push(unit.data); - } + this.pushPPSorSPS(track.pps, unit.data, track.vps); } break; @@ -227,6 +217,16 @@ class HevcVideoParser extends BaseVideoParser { } } + private pushPPSorSPS( + ppsOrSps: Uint8Array[], + data: Uint8Array, + vps: Uint8Array[] | undefined, + ) { + if ((vps && vps[0] === this.initVPS) || (!vps && !ppsOrSps.length)) { + ppsOrSps.push(data); + } + } + protected getNALuType(data: Uint8Array, offset: number): number { return (data[offset] & 0x7e) >>> 1; } diff --git a/src/remux/mp4-generator.ts b/src/remux/mp4-generator.ts index d2c9f13c37c..c346823078b 100644 --- a/src/remux/mp4-generator.ts +++ b/src/remux/mp4-generator.ts @@ -3,7 +3,21 @@ */ import { appendUint8Array } from '../utils/mp4-tools'; -import type { DemuxedAudioTrack } from '../types/demuxer'; +import type { + DemuxedAC3, + DemuxedAudioTrack, + DemuxedAVC1, + DemuxedHEVC, + DemuxedVideoTrack, +} from '../types/demuxer'; +import type { + Mp4SampleFlags, + RemuxedAudioTrackSamples, + RemuxedVideoTrackSamples, +} from '../types/remuxer'; + +type MediaTrackType = DemuxedAudioTrack | DemuxedVideoTrack; +type RemuxedTrackType = RemuxedAudioTrackSamples | RemuxedVideoTrackSamples; type HdlrTypes = { video: Uint8Array; @@ -266,7 +280,7 @@ class MP4 { MP4.DINF = MP4.box(MP4.types.dinf, MP4.box(MP4.types.dref, dref)); } - static box(type, ...payload: Uint8Array[]) { + static box(type: number[], ...payload: Uint8Array[]) { let size = 8; let i = payload.length; const len = i; @@ -290,15 +304,15 @@ class MP4 { return result; } - static hdlr(type) { + static hdlr(type: keyof HdlrTypes) { return MP4.box(MP4.types.hdlr, MP4.HDLR_TYPES[type]); } - static mdat(data) { + static mdat(data: Uint8Array) { return MP4.box(MP4.types.mdat, data); } - static mdhd(timescale, duration) { + static mdhd(timescale: number, duration: number) { duration *= timescale; const upperWordDuration = Math.floor(duration / (UINT32_MAX + 1)); const lowerWordDuration = Math.floor(duration % (UINT32_MAX + 1)); @@ -345,16 +359,16 @@ class MP4 { ); } - static mdia(track) { + static mdia(track: MediaTrackType) { return MP4.box( MP4.types.mdia, - MP4.mdhd(track.timescale, track.duration), + MP4.mdhd(track.timescale || 0, track.duration || 0), MP4.hdlr(track.type), MP4.minf(track), ); } - static mfhd(sequenceNumber) { + static mfhd(sequenceNumber: number) { return MP4.box( MP4.types.mfhd, new Uint8Array([ @@ -370,7 +384,7 @@ class MP4 { ); } - static minf(track) { + static minf(track: MediaTrackType) { if (track.type === 'audio') { return MP4.box( MP4.types.minf, @@ -388,7 +402,11 @@ class MP4 { } } - static moof(sn, baseMediaDecodeTime, track) { + static moof( + sn: number, + baseMediaDecodeTime: number, + track: RemuxedTrackType, + ) { return MP4.box( MP4.types.moof, MP4.mfhd(sn), @@ -396,7 +414,7 @@ class MP4 { ); } - static moov(tracks) { + static moov(tracks: MediaTrackType[]) { let i = tracks.length; const boxes: Uint8Array[] = []; @@ -406,13 +424,16 @@ class MP4 { return MP4.box.apply( null, - [MP4.types.moov, MP4.mvhd(tracks[0].timescale, tracks[0].duration)] + [ + MP4.types.moov, + MP4.mvhd(tracks[0].timescale || 0, tracks[0].duration || 0), + ] .concat(boxes) .concat(MP4.mvex(tracks)), ); } - static mvex(tracks) { + static mvex(tracks: MediaTrackType[]) { let i = tracks.length; const boxes: Uint8Array[] = []; @@ -423,7 +444,7 @@ class MP4 { return MP4.box.apply(null, [MP4.types.mvex, ...boxes]); } - static mvhd(timescale, duration) { + static mvhd(timescale: number, duration: number) { duration *= timescale; const upperWordDuration = Math.floor(duration / (UINT32_MAX + 1)); const lowerWordDuration = Math.floor(duration % (UINT32_MAX + 1)); @@ -544,11 +565,11 @@ class MP4 { return MP4.box(MP4.types.mvhd, bytes); } - static sdtp(track) { + static sdtp(track: RemuxedTrackType) { const samples = track.samples || []; const bytes = new Uint8Array(4 + samples.length); - let i; - let flags; + let i: number; + let flags: Mp4SampleFlags; // leave the full box header (4 bytes) all zero // write the sample table for (i = 0; i < samples.length; i++) { @@ -562,7 +583,7 @@ class MP4 { return MP4.box(MP4.types.sdtp, bytes); } - static stbl(track) { + static stbl(track: MediaTrackType) { return MP4.box( MP4.types.stbl, MP4.stsd(track), @@ -573,7 +594,7 @@ class MP4 { ); } - static avc1(track) { + static avc1(track: DemuxedAVC1) { let sps: number[] = []; let pps: number[] = []; let i; @@ -781,8 +802,8 @@ class MP4 { ]); } - static audioStsd(track) { - const samplerate = track.samplerate; + static audioStsd(track: DemuxedAudioTrack) { + const samplerate = track.samplerate || 0; return new Uint8Array([ 0x00, 0x00, @@ -801,7 +822,7 @@ class MP4 { 0x00, 0x00, // reserved 0x00, - track.channelCount, // channelcount + track.channelCount || 0, // channelcount 0x00, 0x10, // sampleSize:16bits 0x00, @@ -815,7 +836,7 @@ class MP4 { ]); } - static mp4a(track) { + static mp4a(track: DemuxedAudioTrack) { return MP4.box( MP4.types.mp4a, MP4.audioStsd(track), @@ -823,39 +844,69 @@ class MP4 { ); } - static mp3(track) { + static mp3(track: DemuxedAudioTrack) { return MP4.box(MP4.types['.mp3'], MP4.audioStsd(track)); } - static ac3(track) { + static ac3(track: DemuxedAudioTrack) { return MP4.box( MP4.types['ac-3'], MP4.audioStsd(track), - MP4.box(MP4.types.dac3, track.config), + MP4.box(MP4.types.dac3, track.config as Uint8Array), ); } - static stsd(track) { + static stsd(track: MediaTrackType | DemuxedAC3): Uint8Array { + const { segmentCodec } = track; if (track.type === 'audio') { - if (track.segmentCodec === 'mp3' && track.codec === 'mp3') { - return MP4.box(MP4.types.stsd, MP4.STSD, MP4.mp3(track)); + if (segmentCodec === 'aac') { + return MP4.box(MP4.types.stsd, MP4.STSD, MP4.mp4a(track)); } - if (track.segmentCodec === 'ac3') { + if ( + __USE_M2TS_ADVANCED_CODECS__ && + segmentCodec === 'ac3' && + track.config + ) { return MP4.box(MP4.types.stsd, MP4.STSD, MP4.ac3(track)); } - return MP4.box(MP4.types.stsd, MP4.STSD, MP4.mp4a(track)); - } else if (track.segmentCodec === 'avc') { - return MP4.box(MP4.types.stsd, MP4.STSD, MP4.avc1(track)); + if (segmentCodec === 'mp3' && track.codec === 'mp3') { + return MP4.box(MP4.types.stsd, MP4.STSD, MP4.mp3(track)); + } } else { - return MP4.box(MP4.types.stsd, MP4.STSD, MP4.hvc1(track)); + if (track.pps && track.sps) { + if (segmentCodec === 'avc') { + return MP4.box( + MP4.types.stsd, + MP4.STSD, + MP4.avc1(track as DemuxedAVC1), + ); + } + if ( + __USE_M2TS_ADVANCED_CODECS__ && + segmentCodec === 'hevc' && + track.vps + ) { + return MP4.box( + MP4.types.stsd, + MP4.STSD, + MP4.hvc1(track as DemuxedHEVC), + ); + } + } else { + throw new Error(`video track missing pps or sps`); + } } + + throw new Error( + `unsupported ${track.type} segment codec (${segmentCodec}/${track.codec})`, + ); } - static tkhd(track) { + static tkhd(track: MediaTrackType) { const id = track.id; - const duration = track.duration * track.timescale; - const width = track.width; - const height = track.height; + const duration = (track.duration || 0) * (track.timescale || 0); + const width = (track as any).width || 0; + const height = (track as any).height || 0; const upperWordDuration = Math.floor(duration / (UINT32_MAX + 1)); const lowerWordDuration = Math.floor(duration % (UINT32_MAX + 1)); return MP4.box( @@ -961,7 +1012,7 @@ class MP4 { ); } - static traf(track, baseMediaDecodeTime) { + static traf(track: RemuxedTrackType, baseMediaDecodeTime: number) { const sampleDependencyTable = MP4.sdtp(track); const id = track.id; const upperWordBaseMediaDecodeTime = Math.floor( @@ -1020,12 +1071,12 @@ class MP4 { * Generate a track box. * @param track a track definition */ - static trak(track) { + static trak(track: MediaTrackType) { track.duration = track.duration || 0xffffffff; return MP4.box(MP4.types.trak, MP4.tkhd(track), MP4.mdia(track)); } - static trex(track) { + static trex(track: MediaTrackType) { const id = track.id; return MP4.box( MP4.types.trex, @@ -1058,7 +1109,7 @@ class MP4 { ); } - static trun(track, offset) { + static trun(track: MediaTrackType, offset: number) { const samples = track.samples || []; const len = samples.length; const arraylen = 12 + 16 * len; @@ -1121,7 +1172,7 @@ class MP4 { return MP4.box(MP4.types.trun, array); } - static initSegment(tracks) { + static initSegment(tracks: MediaTrackType[]) { if (!MP4.types) { MP4.init(); } @@ -1131,9 +1182,12 @@ class MP4 { return result; } - static hvc1(track) { + static hvc1(track: DemuxedHEVC) { + if (!__USE_M2TS_ADVANCED_CODECS__) { + return new Uint8Array(); + } const ps = track.params; - const units = [track.vps, track.sps, track.pps]; + const units: Uint8Array[][] = [track.vps, track.sps, track.pps]; const NALuLengthSize = 4; const config = new Uint8Array([ 0x01, diff --git a/src/remux/mp4-remuxer.ts b/src/remux/mp4-remuxer.ts index 5491ee43fd0..3e761f585e0 100644 --- a/src/remux/mp4-remuxer.ts +++ b/src/remux/mp4-remuxer.ts @@ -17,6 +17,7 @@ import type { } from '../types/demuxer'; import type { InitSegmentData, + Mp4Sample, RemuxedMetadata, RemuxedTrack, RemuxedUserdata, @@ -36,6 +37,26 @@ const AC3_SAMPLES_PER_FRAME = 1536; let chromeVersion: number | null = null; let safariWebkitVersion: number | null = null; +function createMp4Sample( + isKeyframe: boolean, + duration: number, + size: number, + cts: number, +): Mp4Sample { + return { + duration, + size, + cts, + flags: { + isLeading: 0, + isDependedOn: 0, + hasRedundancy: 0, + degradPrio: 0, + dependsOn: isKeyframe ? 2 : 1, + isNonSync: isKeyframe ? 0 : 1, + }, + }; +} export default class MP4Remuxer implements Remuxer { private readonly logger: ILogger; private readonly observer: HlsEventEmitter; @@ -718,7 +739,7 @@ export default class MP4Remuxer implements Remuxer { maxPtsDelta = Math.max(maxPtsDelta, ptsDelta); outputSamples.push( - new Mp4Sample( + createMp4Sample( VideoSample.key, mp4SampleDuration, mp4SampleLength, @@ -776,7 +797,7 @@ export default class MP4Remuxer implements Remuxer { const moof = MP4.moof( track.sequenceNumber++, firstDTS, - Object.assign({}, track, { + Object.assign(track, { samples: outputSamples, }), ); @@ -1027,7 +1048,7 @@ export default class MP4Remuxer implements Remuxer { // Default the sample's duration to the computed mp4SampleDuration, which will either be 1024 for AAC or 1152 for MPEG // In the case that we have 1 sample, this will be the duration. If we have more than one sample, the duration // becomes the PTS diff with the previous sample - outputSamples.push(new Mp4Sample(true, mp4SampleDuration, unitLen, 0)); + outputSamples.push(createMp4Sample(true, mp4SampleDuration, unitLen, 0)); lastPTS = pts; } @@ -1167,38 +1188,3 @@ export function flushTextTrackUserdataCueSamples( samples, }; } - -type Mp4SampleFlags = { - isLeading: 0; - isDependedOn: 0; - hasRedundancy: 0; - degradPrio: 0; - dependsOn: 1 | 2; - isNonSync: 0 | 1; -}; - -class Mp4Sample { - public size: number; - public duration: number; - public cts: number; - public flags: Mp4SampleFlags; - - constructor( - isKeyframe: boolean, - duration: number, - size: number, - cts: number, - ) { - this.duration = duration; - this.size = size; - this.cts = cts; - this.flags = { - isLeading: 0, - isDependedOn: 0, - hasRedundancy: 0, - degradPrio: 0, - dependsOn: isKeyframe ? 2 : 1, - isNonSync: isKeyframe ? 0 : 1, - }; - } -} diff --git a/src/types/demuxer.ts b/src/types/demuxer.ts index 9fbb8f80f9c..e7249270cef 100644 --- a/src/types/demuxer.ts +++ b/src/types/demuxer.ts @@ -59,15 +59,21 @@ export interface PassthroughTrack extends DemuxedTrack { codec: string; } export interface DemuxedAudioTrack extends DemuxedTrack { + type: 'audio'; + segmentCodec: 'aac' | 'ac3' | 'mp3'; config?: number[] | Uint8Array; samplerate?: number; - segmentCodec?: string; channelCount?: number; manifestCodec?: string; parsedCodec?: string; samples: AudioSample[]; } +export type DemuxedAC3 = DemuxedAudioTrack & { + segmentCodec: 'ac3'; + config: Uint8Array; +}; + export interface DemuxedVideoTrackBase extends DemuxedTrack { width?: number; height?: number; @@ -84,9 +90,43 @@ export interface DemuxedVideoTrackBase extends DemuxedTrack { } export interface DemuxedVideoTrack extends DemuxedVideoTrackBase { + type: 'video'; + segmentCodec: 'avc' | 'hevc'; samples: VideoSample[]; + pixelRatio: [number, number]; + width: number; + height: number; } +export type DemuxedAVC1 = DemuxedVideoTrack & { + segmentCodec: 'avc'; + pps: Uint8Array[]; + sps: Uint8Array[]; +}; + +export type DemuxedHEVC = DemuxedVideoTrack & { + segmentCodec: 'hevc'; + params: { + general_profile_space: number; + general_tier_flag: number; + general_profile_idc: number; + general_profile_compatibility_flags: number[]; + general_constraint_indicator_flags: number[]; + general_level_idc: number; + min_spatial_segmentation_idc: number; + parallelismType: number; + chroma_format_idc: number; + bit_depth_luma_minus8: number; + bit_depth_chroma_minus8: number; + frame_rate: { fps: string; fixed: boolean }; + temporal_id_nested: number; + num_temporal_layers: number; + }; + pps: Uint8Array[]; + sps: Uint8Array[]; + vps: Uint8Array; +}; + export interface DemuxedMetadataTrack extends DemuxedTrack { samples: MetadataSample[]; } diff --git a/src/types/remuxer.ts b/src/types/remuxer.ts index d3b54c89452..8478a1f80c2 100644 --- a/src/types/remuxer.ts +++ b/src/types/remuxer.ts @@ -3,6 +3,7 @@ import type { DemuxedAudioTrack, DemuxedMetadataTrack, DemuxedUserdataTrack, + DemuxedVideoTrack, DemuxedVideoTrackBase, MetadataSample, UserdataSample, @@ -61,6 +62,30 @@ export interface RemuxedUserdata { samples: UserdataSample[]; } +export type Mp4SampleFlags = { + isLeading: 0; + isDependedOn: 0; + hasRedundancy: 0; + degradPrio: 0; + dependsOn: 1 | 2; + isNonSync: 0 | 1; +}; + +export type Mp4Sample = { + size: number; + duration: number; + cts: number; + flags: Mp4SampleFlags; +}; + +export type RemuxedAudioTrackSamples = DemuxedAudioTrack & { + samples: Mp4Sample[]; +}; + +export type RemuxedVideoTrackSamples = DemuxedVideoTrack & { + samples: Mp4Sample[]; +}; + export interface RemuxerResult { audio?: RemuxedTrack; video?: RemuxedTrack; From 2a308871671184a0e48c5122cd13d431f672f105 Mon Sep 17 00:00:00 2001 From: Rob Walch Date: Tue, 7 Jan 2025 10:19:11 -0800 Subject: [PATCH 2/2] Rename pushParameterSet method and sets argument Re: https://github.com/video-dev/hls.js/pull/6940#discussion_r1905417304 --- src/demux/video/hevc-video-parser.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/demux/video/hevc-video-parser.ts b/src/demux/video/hevc-video-parser.ts index a9c92d6aa50..485ee85c6bd 100644 --- a/src/demux/video/hevc-video-parser.ts +++ b/src/demux/video/hevc-video-parser.ts @@ -158,7 +158,7 @@ class HevcVideoParser extends BaseVideoParser { track.params[prop] = config.params[prop]; } } - this.pushPPSorSPS(track.sps, unit.data, track.vps); + this.pushParameterSet(track.sps, unit.data, track.vps); if (!VideoSample) { VideoSample = this.VideoSample = this.createVideoSample( true, @@ -180,7 +180,7 @@ class HevcVideoParser extends BaseVideoParser { track.params[prop] = config[prop]; } } - this.pushPPSorSPS(track.pps, unit.data, track.vps); + this.pushParameterSet(track.pps, unit.data, track.vps); } break; @@ -217,13 +217,13 @@ class HevcVideoParser extends BaseVideoParser { } } - private pushPPSorSPS( - ppsOrSps: Uint8Array[], + private pushParameterSet( + parameterSets: Uint8Array[], data: Uint8Array, vps: Uint8Array[] | undefined, ) { - if ((vps && vps[0] === this.initVPS) || (!vps && !ppsOrSps.length)) { - ppsOrSps.push(data); + if ((vps && vps[0] === this.initVPS) || (!vps && !parameterSets.length)) { + parameterSets.push(data); } }