Skip to content

Commit

Permalink
Scanner performance improvements
Browse files Browse the repository at this point in the history
- Improved cache locality by merging the 32-bit and 64-bit value loops
- Massively improved lookup speeds by swapping FxHashMap for Vec and `binary_search`

From my testing, this brings the scan down from 2.5 minutes to just *20 seconds*
  • Loading branch information
cohaereo committed Mar 16, 2024
1 parent f8ec4cd commit ce3b050
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 88 deletions.
26 changes: 20 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0.75"
clap = { version = "4.3.11", features = ["derive"] }
destiny-pkg = { git = "https://github.com/v4nguard/destiny-pkg", version = "0.9.1" }
destiny-pkg = { git = "https://github.com/v4nguard/destiny-pkg", version = "0.9.3" }
vgmstream = { git = "https://github.com/cohaereo/vgmstream-rs/", version = "0.1.2" }
eframe = { version = "0.23.0", default-features = false, features = [
"default_fonts",
Expand Down Expand Up @@ -39,6 +39,7 @@ wav = "1.0.0"
linked-hash-map = "0.5.6"
bytemuck = "1.14.1"
rustc-hash = "1.1.0"
profiling = { version = "1.0.15", features = [] }

[profile.dev]
opt-level = 3
Expand Down
54 changes: 42 additions & 12 deletions src/gui/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use std::fs::File;

use destiny_pkg::{TagHash, TagHash64};
use eframe::egui;
use log::{error, warn};
use log::{error, info, warn};
use std::io::Write;

use crate::{packages::package_manager, tagtypes::TagType};

Expand Down Expand Up @@ -118,14 +119,43 @@ pub fn open_audio_file_in_default_application(tag: TagHash, ext: &str) {
});
}

// pub fn dump_wwise_info(package_id: u16) {
// let package_path = package_manager()
// .package_paths
// .get(&package_id)
// .cloned()
// .unwrap();
// let version = package_manager().version;
// std::thread::spawn(move || {
// let package = version.open(&package_path.path).unwrap();
// });
// }
pub fn dump_wwise_info(package_id: u16) {
let package_path = package_manager()
.package_paths
.get(&package_id)
.cloned()
.unwrap();
let version = package_manager().version;
std::thread::spawn(move || {
let mut info_file = File::create(format!("wwise_info_{:04x}.txt", package_id)).unwrap();
let package = version.open(&package_path.path).unwrap();
let mut infos = vec![];
for (i, _e) in package.entries().iter().enumerate().filter(|(_, e)| {
TagType::from_type_subtype(e.file_type, e.file_subtype) == TagType::WwiseStream
}) {
let tag = TagHash::new(package_id, i as u16);
if let Ok(p) = package.read_entry(i) {
if let Ok(info) = vgmstream::read_file_info(&p, Some(format!(".\\{tag}.wem"))) {
infos.push((tag, info));
}
}
}

infos.sort_by_key(|(_, info)| {
((info.num_samples as f32 / info.sample_rate as f32) * 100.0) as usize
});

for (tag, info) in infos {
writeln!(
&mut info_file,
"{tag} - samplerate={}hz samples={} duration={:.2}",
info.sample_rate,
info.num_samples,
info.num_samples as f32 / info.sample_rate as f32
)
.ok();
}

info!("dump_wwise_info: Done");
});
}
19 changes: 12 additions & 7 deletions src/gui/packages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@ use eframe::egui::{self, RichText};

use crate::{packages::package_manager, tagtypes::TagType};

use super::{common::tag_context, tag::format_tag_entry, texture::TextureCache, View, ViewAction};
use super::{
common::{dump_wwise_info, tag_context},
tag::format_tag_entry,
texture::TextureCache,
View, ViewAction,
};

pub struct PackagesView {
selected_package: u16,
Expand Down Expand Up @@ -44,9 +49,9 @@ impl View for PackagesView {
.max_width(f32::INFINITY)
.show(ui, |ui| {
for (id, path) in package_manager().package_paths.iter() {
let package_name = format!("{}_{}", path.name, path.id);
if !self.package_filter.is_empty()
&& !path
.name
&& !package_name
.to_lowercase()
.contains(&self.package_filter.to_lowercase())
{
Expand All @@ -57,7 +62,7 @@ impl View for PackagesView {
.selectable_value(
&mut self.selected_package,
*id,
format!("{id:04x}: {}", path.name),
format!("{id:04x}: {package_name}"),
)
.changed()
{
Expand Down Expand Up @@ -92,9 +97,9 @@ impl View for PackagesView {
if self.selected_package == u16::MAX {
ui.label(RichText::new("No package selected").italics());
} else {
// if ui.button("Export audio info").clicked() {
// dump_wwise_info(self.selected_package);
// }
if ui.button("Export audio info").clicked() {
dump_wwise_info(self.selected_package);
}

for (i, (label, tag_type)) in self
.package_entry_search_cache
Expand Down
133 changes: 71 additions & 62 deletions src/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use eframe::epaint::mutex::RwLock;
use itertools::Itertools;
use log::{error, info, warn};
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
use rustc_hash::{FxHashMap, FxHashSet};
use rustc_hash::FxHashMap;

use crate::{
packages::package_manager,
Expand Down Expand Up @@ -43,9 +43,9 @@ impl Default for TagCache {

// Shareable read-only context
pub struct ScannerContext {
pub valid_file_hashes: FxHashSet<TagHash>,
pub valid_file_hashes64: FxHashSet<TagHash64>,
pub known_string_hashes: FxHashSet<u32>,
pub valid_file_hashes: Vec<TagHash>,
pub valid_file_hashes64: Vec<TagHash64>,
pub known_string_hashes: Vec<u32>,
pub endian: Endian,
}

Expand Down Expand Up @@ -98,74 +98,71 @@ pub fn fnv1(data: &[u8]) -> u32 {
}

pub fn scan_file(context: &ScannerContext, data: &[u8]) -> ScanResult {
let mut r = ScanResult::default();

for (i, v) in data.chunks_exact(4).enumerate() {
let m: [u8; 4] = v.try_into().unwrap();
let value = u32_from_endian(context.endian, m);
profiling::scope!(
"scan_file",
format!("data len = {} bytes", data.len()).as_str()
);

let offset = (i * 4) as u64;
let hash = TagHash(value);
let mut r = ScanResult::default();

if hash.is_pkg_file() && context.valid_file_hashes.contains(&hash) {
r.file_hashes.push(ScannedHash { offset, hash });
for (i, v) in data.chunks_exact(8).enumerate() {
let m: [u8; 8] = v.try_into().unwrap();
let m32_1: [u8; 4] = v[0..4].try_into().unwrap();
let m32_2: [u8; 4] = v[4..8].try_into().unwrap();
let value64 = u64_from_endian(context.endian, m);
let value_hi = u32_from_endian(context.endian, m32_1);
let value_lo = u32_from_endian(context.endian, m32_2);
let offset_u64 = (i * 8) as u64;

let hash = TagHash64(value64);
{
profiling::scope!("check 64 bit hash");
if context.valid_file_hashes64.binary_search(&hash).is_ok() {
profiling::scope!("insert 64 bit hash");
r.file_hashes64.push(ScannedHash {
offset: offset_u64,
hash,
});
}
}

// if hash.is_valid() && !hash.is_pkg_file() {
// r.classes.push(ScannedHash {
// offset,
// hash: value,
// });
// }

if value == 0x80800065 {
r.raw_strings.extend(
read_raw_string_blob(data, offset)
.into_iter()
.map(|(_, s)| s),
);
}
profiling::scope!("32 bit chunks");
for (vi, value) in [value_hi, value_lo].into_iter().enumerate() {
let offset = offset_u64 + (vi * 4) as u64;
let hash = TagHash(value);

if value != 0x811c9dc5 && context.known_string_hashes.contains(&value) {
r.string_hashes.push(ScannedHash {
offset,
hash: value,
});
}
}
if hash.is_pkg_file() && context.valid_file_hashes.binary_search(&hash).is_ok() {
r.file_hashes.push(ScannedHash { offset, hash });
}

for (i, v) in data.chunks_exact(8).enumerate() {
let m: [u8; 8] = v.try_into().unwrap();
let value = u64_from_endian(context.endian, m);
// if hash.is_valid() && !hash.is_pkg_file() {
// r.classes.push(ScannedHash {
// offset,
// hash: value,
// });
// }

if value == 0x80800065 {
r.raw_strings.extend(
read_raw_string_blob(data, offset)
.into_iter()
.map(|(_, s)| s),
);
}

let offset = (i * 8) as u64;
let hash = TagHash64(value);
if context.valid_file_hashes64.contains(&hash) {
r.file_hashes64.push(ScannedHash { offset, hash });
if value != 0x811c9dc5 && context.known_string_hashes.binary_search(&value).is_ok() {
r.string_hashes.push(ScannedHash {
offset,
hash: value,
});
}
}
}

// let mut cur = Cursor::new(data);
// for c in &r.classes {
// if c.hash == 0x80809fb8 {
// cur.seek(SeekFrom::Start(c.offset + 4)).unwrap();

// let mut count_bytes = [0; 8];
// cur.read_exact(&mut count_bytes).unwrap();
// let mut class_bytes = [0; 4];
// cur.read_exact(&mut class_bytes).unwrap();

// r.arrays.push(ScannedArray {
// offset: c.offset + 4,
// count: u64::from_le_bytes(count_bytes) as usize,
// class: u32::from_le_bytes(class_bytes),
// });
// }
// }

r
}

#[profiling::function]
pub fn read_raw_string_blob(data: &[u8], offset: u64) -> Vec<(u64, String)> {
let mut strings = vec![];

Expand Down Expand Up @@ -222,7 +219,7 @@ pub fn create_scanner_context(package_manager: &PackageManager) -> anyhow::Resul

let stringmap = create_stringmap()?;

Ok(ScannerContext {
let mut res = ScannerContext {
valid_file_hashes: package_manager
.package_entry_index
.iter()
Expand All @@ -241,7 +238,13 @@ pub fn create_scanner_context(package_manager: &PackageManager) -> anyhow::Resul
.collect(),
known_string_hashes: stringmap.keys().cloned().collect(),
endian,
})
};

res.valid_file_hashes.sort_unstable();
res.valid_file_hashes64.sort_unstable();
res.known_string_hashes.sort_unstable();

Ok(res)
}

#[derive(Copy, Clone)]
Expand Down Expand Up @@ -386,6 +389,7 @@ pub fn load_tag_cache(version: PackageVersion) -> TagCache {
let cache: FxHashMap<TagHash, ScanResult> = all_pkgs
.par_iter()
.map_with(scanner_context, |context, path| {
profiling::scope!("scan_pkg", &path.path);
let current_package = {
let mut p = SCANNER_PROGRESS.write();
let current_package = if let ScanStatus::Scanning {
Expand All @@ -404,8 +408,12 @@ pub fn load_tag_cache(version: PackageVersion) -> TagCache {

current_package
};

info!("Opening pkg {path} ({}/{package_count})", current_package);
let pkg = version.open(&path.path).unwrap();
let pkg = {
profiling::scope!("open package");
version.open(&path.path).unwrap()
};

let mut all_tags = if version.is_d1() {
[pkg.get_all_by_type(0, None)].concat()
Expand All @@ -423,6 +431,7 @@ pub fn load_tag_cache(version: PackageVersion) -> TagCache {
let mut results = FxHashMap::default();
for (t, _) in all_tags {
let hash = TagHash::new(pkg.pkg_id(), t as u16);
profiling::scope!("scan_tag", format!("tag {hash}").as_str());

let data = match pkg.read_entry(t) {
Ok(d) => d,
Expand Down

0 comments on commit ce3b050

Please sign in to comment.