Skip to content

Commit

Permalink
Improved JPEG end-of-file detection
Browse files Browse the repository at this point in the history
  • Loading branch information
devttys0 committed Nov 20, 2024
1 parent b7abdab commit eddfe53
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 18 deletions.
98 changes: 86 additions & 12 deletions src/extractors/jpeg.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::extractors::common::{Chroot, ExtractionResult, Extractor, ExtractorType};
use aho_corasick::AhoCorasick;

/// Defines the internal extractor function for carving out JPEG images
///
Expand Down Expand Up @@ -58,22 +57,97 @@ pub fn extract_jpeg_image(
result
}

/// Parses JPEG markers until the EOF marker is found
fn get_jpeg_data_size(jpeg_data: &[u8]) -> Option<usize> {
const EOF_SIZE: usize = 2;
const JPEG_DELIM: u8 = 0xFF;
const SIZE_FIELD_LENGTH: usize = 2;
const SOS_SCAN_AHEAD_LENGTH: usize = 2;
const MARKER_MAGIC: u8 = 0xFF;
const SOS_MARKER: u8 = 0xDA;
const EOF_MARKER: u8 = 0xD9;

// This is a short EOF marker to search for, but in a valid JPEG it *should* only occur at EOF
let grep = AhoCorasick::new(vec![b"\xFF\xD9"]).unwrap();
let mut next_marker_offset: usize = 0;

for eof_match in grep.find_overlapping_iter(jpeg_data) {
let eof_candidate: usize = eof_match.start() + EOF_SIZE;
// Most JPEG markers include a size field; these do not
let no_length_markers: Vec<u8> = vec![
0x00, 0x01, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, EOF_MARKER,
];

// Make sure the expected EOF marker is not immediately followed by 0xFF (which would indicate the JPEG continues...)
if eof_candidate < jpeg_data.len() && jpeg_data[eof_candidate] == JPEG_DELIM {
continue;
}
// In a Start Of Scan block, ignore 0xFF marker magics that are followed by one of these bytes
let sos_skip_markers: Vec<u8> = vec![0x00, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7];

loop {
// Read the marker magic byte
match jpeg_data.get(next_marker_offset) {
None => {
break;
}
Some(marker_magic) => {
// Make sure this is the correct marker magic
if *marker_magic != MARKER_MAGIC {
break;
}

// Include marker magic byte in side of the marker
next_marker_offset += 1;

// Read the marker ID byte
match jpeg_data.get(next_marker_offset) {
None => {
break;
}
Some(marker_id) => {
// Include marker ID byte in the size of the marker
next_marker_offset += 1;

return Some(eof_match.start() + EOF_SIZE);
// Most markers have a 2-byte length field after the marker, stored in big-endian
if !no_length_markers.contains(marker_id) {
match jpeg_data
.get(next_marker_offset..next_marker_offset + SIZE_FIELD_LENGTH)
{
None => {
break;
}
Some(size_bytes) => {
next_marker_offset +=
u16::from_be_bytes(size_bytes.try_into().unwrap()) as usize;
}
}
}

// Start Of Scan markers have a size field, but are immediately followed by data not included int
// the size field. Need to scan all the bytes until the next valid JPEG marker is found.
if *marker_id == SOS_MARKER {
loop {
// Get the next two bytes
match jpeg_data.get(
next_marker_offset..next_marker_offset + SOS_SCAN_AHEAD_LENGTH,
) {
None => {
break;
}
Some(next_bytes) => {
// Check if the next byte is a marker magic byte, *and* that it is not followed by a marker escape byte
if next_bytes[0] == MARKER_MAGIC
&& !sos_skip_markers.contains(&next_bytes[1])
{
break;
} else {
// Go to the next byte
next_marker_offset += 1;
}
}
}
}
}

// EOF marker indicates the end of the JPEG image
if *marker_id == EOF_MARKER {
return Some(next_marker_offset);
}
}
}
}
}
}

None
Expand Down
13 changes: 7 additions & 6 deletions src/signatures/jpeg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,16 @@ pub fn jpeg_parser(file_data: &[u8], offset: usize) -> Result<SignatureResult, S
if dry_run.success {
// Get the total size of the JPEG
if let Some(jpeg_size) = dry_run.size {
// If the start of a file is a JPEG, there's no need to extract it
if offset == 0 {
result.extraction_declined = true;
}

// Report signature result
// Report signature result data
result.size = jpeg_size;
result.description =
format!("{}, total size: {} bytes", result.description, result.size);

// If this entire file is a JPEG, no need to extract it
if offset == 0 && result.size == file_data.len() {
result.extraction_declined = true;
}

return Ok(result);
}
}
Expand Down

0 comments on commit eddfe53

Please sign in to comment.