Support for Intel Arc AV1 encode/decode (#14)

Proryanator · Mar 31, 2023 · 7308334 · 7308334
1 parent 31e970c
commit 7308334
Show file tree

Hide file tree

Showing 11 changed files with 269 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -12,6 +12,9 @@
 - [Applying your Findings](#applying-your-findings)
 - [Author's Research Findings and Discussion](#authors-research-findings-and-discussion)
 
+To see research outcomes for specific encoder types, please see [Encoder Specific Notes](#encoder-specific-notes) in
+the `Expected Performance` section.
+
 ## Overview
 
 ### Project Goals
@@ -55,8 +58,7 @@ supported.
 - ***Nvidia NVENC H264/HEVC** (h264_nvenc, hevc_nvenc)
 - ***AMD H264/HEVC** (h264_amf, hevc_amf)
 - **Intel Quick Sync Video H264/HEVC** (h264_qsv, hevc_qsv)
-
-Note, no support for software/CPU encoding or AV1 hardware encoding exists yet.
+- ***Intel Arc AV1** (av1_qsv)
 
 ## Minimum system specs suggested
 
@@ -86,13 +88,15 @@ PCI bottlenecking for GPU's not in the primary slot.
   free to have more than 1 for your testing (although the benchmark would only run against one)
 - the tool does _not_ support multiple AMD GPU's for the benchmark tool, but you are able to still specify _-gpu_ with
   the permutor-cli tool
+- for AV1 on Intel Arc, make sure the monitor plugged into the Arc GPU is your primary monitor, otherwise ffmpeg may not
+  use/pickup the GPU
 
 ---
 
 ## Installation and Setup
 
-Note: tool has been tested with ffmpeg version `5.1.2`, so it's highly suggested to use the same version, or at least
-version `5.*` of ffmpeg/ffprobe.
+Note: tool has been tested with ffmpeg version `6.0` (this version comes bundled with AV1 hardware encoding support), so
+it's highly suggested to use the same version, or at least version `6.*` of ffmpeg/ffprobe.
 
 1) Installation of <a href='https://ffmpeg.org/download.html'>ffmpeg</a>
 
@@ -566,6 +570,9 @@ performance benefit.
 
 ## Encoder Specific Notes
 
+- [H264/HEVC NVENC](#h264hevc-nvenc)
+- [AV1 Intel Arc](#av1-on-intel-arc)
+
 ### H264/HEVC NVENC
 
 #### Presets & Tunes
@@ -648,6 +655,59 @@ NVENC HEVC: 4K@120   -> 90-100Mb/s
 
 ---
 
+### AV1 on Intel Arc
+
+#### Presets
+
+Presets of `veryfast, faster, fast, medium, slow, slower, veryslow` are used in the tool. There may be varying degrees
+of quality increases from `veryfast` to `veryslow`, albeit with a maximum fps performance hit.
+
+Noticed not very much of a VMAF score increase between presets, between `veryfast` and `veryslow` there's ~2 points of a
+difference. It's recommended to stream using `veryfast` to get the most fps at relatively the same VMAF score.
+
+#### Profiles
+
+There were 2 profiles provided in the version of ffmpeg used by this tool: `main` and `unknown`. `unknown` is most
+likely a placeholder and produces similar results to `main`, so the tools will only use `main`.
+
+#### Async Depth
+
+This has to do with parallelism, and ffmpeg defaults to this being set to `4`. Any lower and you start to see pretty
+decent fps drops for realtime encoding performance. Any higher than `4` (tested up to `8`) there is negligible fps
+performance, gaining ~2fps or so in 1% lows.
+
+---
+
+## Expected Performance
+
+### Minimum Spec'd PC w/ AV1 Intel Arc GPU
+
+(No affiliate links in here, just for reference)
+
+- <b>
+  CPU:</b> <a href='https://www.intel.com/content/www/us/en/products/sku/126687/intel-core-i58400-processor-9m-cache-up-to-4-00-ghz/specifications.html'>
+  Intel i5-8400 (6 cores/6 threads)</a>
+- <b>RAM:</b> 16GB of <a href='https://a.co/d/0O1qryh'>G.Skill Ripjaws V DDR4 3200Mhz</a>
+- <b>GPU:</b> <a href='https://a.co/d/iJLdgKx'>Asus GTX 1660 Super</a>
+- <b>NVME SSD</b>: <a href='https://a.co/d/clUM7ta'>PNY250GB NVMe PCI Gen3 x4</a>
+- <b>Intel Arc GPU:</b> ASRock A380 6GB
+- <b>Intel Arc Driver:</b> 31.0.101.4255
+
+So far, the author did his testing at 4K@60, but as more resolutions get tested there will be more results below.
+
+```text
+720@60   -> ???
+720@120  -> ???
+1080@60  -> ???
+1080@120 -> ???
+2k@60    -> ???
+2k@120   -> ???
+4k@60    -> 35-40Mb/s
+4k@120   -> ???
+```
+
+---
+
 ## Feature Requests, Bugs or Issues
 
 The author plans to add more encoder support, run the benchmark on a wide variety of hardware, and much much more.

diff --git a/benchmark/src/main.rs b/benchmark/src/main.rs
@@ -9,10 +9,11 @@ use text_io::read;
 use cli::cli_util::{is_dev, pause};
 use cli::supported::{get_supported_encoders, get_supported_inputs};
 use codecs::amf::Amf;
+use codecs::av1_qsv::AV1QSV;
 use codecs::get_vendor_for_codec;
-use codecs::intel_igpu::IntelIGPU;
 use codecs::nvenc::Nvenc;
 use codecs::permute::Permute;
+use codecs::qsv::QSV;
 use codecs::vendor::Vendor;
 use engine::benchmark_engine::BenchmarkEngine;
 use ffmpeg::metadata::MetaData;
@@ -208,9 +209,14 @@ fn get_benchmark_settings_for(cli: &BenchmarkCli) -> String {
             amf.get_benchmark_settings()
         }
 
-        Vendor::InteliGPU => {
-            let intel_qsv = IntelIGPU::new(cli.encoder == "hevc_qsv");
-            intel_qsv.get_benchmark_settings()
+        Vendor::IntelQSV => {
+            if cli.encoder.contains("av1") {
+                let intel_av1 = AV1QSV::new();
+                intel_av1.get_benchmark_settings()
+            } else {
+                let intel_qsv = QSV::new(cli.encoder == "hevc_qsv");
+                intel_qsv.get_benchmark_settings()
+            }
         }
         Vendor::Unknown => {
             // nothing to do here
@@ -261,7 +267,7 @@ fn fig_title(msg: String, small_font_content: String) {
     let figure = small_font.convert(msg.as_str());
     assert!(figure.is_some());
     println!("{}\n", figure.unwrap());
-    println!("Version v0.2.0-alpha");
+    println!("Version v0.6.0-alpha");
     println!("Source code: https://github.com/Proryanator/encoder-benchmark\n");
 
     fs::remove_file(small_font_file_name).expect("Not able to delete tmp file");

diff --git a/cli/src/supported.rs b/cli/src/supported.rs
@@ -1,12 +1,12 @@
-const SUPPORTED_ENCODERS: [&'static str; 6] = ["h264_nvenc", "hevc_nvenc", "h264_amf", "hevc_amf", "h264_qsv", "hevc_qsv"];
+const SUPPORTED_ENCODERS: [&'static str; 7] = ["h264_nvenc", "hevc_nvenc", "h264_amf", "hevc_amf", "h264_qsv", "hevc_qsv", "av1_qsv"];
 const DOWNLOAD_URL: &str = "https://www.dropbox.com/sh/x08pkk47lc1v5ex/AADGaoOjOcA0-uPo7I0NaxL-a?dl=0";
 const ENCODE_FILES: [&'static str; 8] = ["720-60.y4m", "720-120.y4m", "1080-60.y4m", "1080-120.y4m", "2k-60.y4m", "2k-120.y4m", "4k-60.y4m", "4k-120.y4m"];
 
 pub fn is_encoder_supported(potential_encoder: &String) -> bool {
     return SUPPORTED_ENCODERS.contains(&potential_encoder.as_str());
 }
 
-pub fn get_supported_encoders() -> [&'static str; 6] {
+pub fn get_supported_encoders() -> [&'static str; 7] {
     return SUPPORTED_ENCODERS;
 }
 

diff --git a/codecs/src/av1_qsv.rs b/codecs/src/av1_qsv.rs
@@ -0,0 +1,140 @@
+use std::collections::HashMap;
+
+use itertools::Itertools;
+
+use crate::permute::Permute;
+use crate::resolutions::map_res_to_bitrate;
+
+// we'll add more options when we add in extended permutation support
+pub struct AV1QSV {
+    presets: Vec<&'static str>,
+    profiles: Vec<&'static str>,
+    async_depth: Vec<&'static str>,
+    // might be able to make this the size we're expecting
+    permutations: Vec<String>,
+    index: i32,
+}
+
+impl AV1QSV {
+    pub fn new() -> Self {
+        Self {
+            presets: get_qsv_presets(),
+            profiles: vec!["main"],
+            // anything lower than 4 you get less fps performance, and anything higher than 4 you don't see much return
+            // (maybe 1% lows might be a bit higher by a few fps)
+            async_depth: vec!["4"],
+            permutations: Vec::new(),
+            // starts at -1, so that first next() will return the first element
+            index: -1,
+        }
+    }
+
+    pub fn get_benchmark_settings(&self) -> String {
+        return String::from("-preset veryfast -profile:v main");
+    }
+
+    fn has_next(&self) -> bool {
+        return self.index != (self.permutations.len() - 1) as i32;
+    }
+}
+
+fn get_qsv_presets() -> Vec<&'static str> {
+    return vec!["veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"];
+}
+
+#[derive(Copy, Clone)]
+struct AV1QSVSettings {
+    preset: &'static str,
+    profile: &'static str,
+    async_depth: &'static str,
+}
+
+impl AV1QSVSettings {
+    fn to_string(&self) -> String {
+        let mut args = String::new();
+        args.push_str("-preset ");
+        args.push_str(self.preset);
+        args.push_str(" -profile:v ");
+        args.push_str(self.profile);
+        args.push_str(" -async_depth ");
+        args.push_str(self.async_depth);
+
+        return args;
+    }
+}
+
+impl Iterator for AV1QSV {
+    type Item = (usize, String);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if !self.has_next() {
+            return None;
+        }
+
+        self.index += 1;
+
+        let usize_index = self.index as usize;
+        return Option::from((usize_index as usize, self.permutations.get(usize_index).unwrap().to_string()));
+    }
+}
+
+impl Permute for AV1QSV {
+    fn init(&mut self) -> &Vec<String> {
+        // reset index, otherwise we won't be able to iterate at all
+        self.index = -1;
+
+        // clear the vectors if there were entries before
+        self.permutations.clear();
+
+        let mut permutations = vec![&self.presets, &self.profiles, &self.async_depth]
+            .into_iter().multi_cartesian_product();
+
+        loop {
+            let perm = permutations.next();
+            if perm.is_none() {
+                break;
+            }
+
+            let unwrapped_perm = perm.unwrap();
+            let settings = AV1QSVSettings {
+                preset: unwrapped_perm.get(0).unwrap(),
+                profile: unwrapped_perm.get(1).unwrap(),
+                async_depth: unwrapped_perm.get(2).unwrap(),
+            };
+
+            self.permutations.push(settings.to_string());
+        }
+
+        return &self.permutations;
+    }
+
+    fn run_standard_only(&mut self) -> &Vec<String> {
+        // reset index, otherwise we won't be able to iterate at all
+        self.index = -1;
+
+        // clear the vectors if there were entries before
+        self.permutations.clear();
+
+        // note: this only works when hevc/h264 both use just 1 profile, if we add more this will break
+        self.permutations.push(String::from(self.get_benchmark_settings()));
+        return &self.permutations;
+    }
+
+    fn get_resolution_to_bitrate_map(fps: u32) -> HashMap<String, u32> {
+        let mut map: HashMap<String, u32> = HashMap::new();
+
+        // bitrates are within 5Mb/s of each other, using higher one
+        // note: these are the 60fps bitrate values
+        // TODO: add in bitrate values here after running the tool
+        let mut bitrates: [u32; 4] = [20, 30, 35, 70];
+
+        // 120 fps is effectively double the bitrate
+        if fps == 120 {
+            bitrates.iter_mut().for_each(|b| *b = *b * 2);
+        }
+
+        map_res_to_bitrate(&mut map, bitrates);
+
+        return map;
+    }
+}
diff --git a/codecs/src/lib.rs b/codecs/src/lib.rs
@@ -5,16 +5,17 @@ pub mod amf;
 pub mod permute;
 mod resolutions;
 pub mod vendor;
-pub mod intel_igpu;
+pub mod qsv;
+pub mod av1_qsv;
 
 
 pub fn get_vendor_for_codec(codec: &String) -> Vendor {
     if codec.contains("nvenc") {
         return Vendor::Nvidia;
     } else if codec.contains("amf") {
         return Vendor::AMD;
-    } else if codec.contains("h264_qsv") || codec.contains("hevc_qsv") {
-        return Vendor::InteliGPU;
+    } else if codec.contains("h264_qsv") || codec.contains("hevc_qsv") || codec.contains("av1_qsv") {
+        return Vendor::IntelQSV;
     }
 
     return Vendor::Unknown;