Skip to content

Commit

Permalink
Refactor path resolution to allow preloading of MFTEntrySummary (#93)
Browse files Browse the repository at this point in the history
When reconstructing USN journal paths we actually have access to more
information than contained in the MFT because the USN journal records
partial filename information. This PR adds the ability to preload these
partial file information in case they are encounterd during path
reconstruction.

See https://cybercx.com.au/blog/ntfs-usnjrnl-rewind/ for more details.
  • Loading branch information
scudette authored Jul 30, 2024
1 parent 9c6c647 commit 6952e90
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 72 deletions.
48 changes: 45 additions & 3 deletions bin/usn.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"context"
"fmt"
"regexp"
"strings"

kingpin "gopkg.in/alecthomas/kingpin.v2"
Expand All @@ -19,6 +20,9 @@ var (

usn_command_watch = usn_command.Flag(
"watch", "Watch the USN for changes").Bool()

usn_command_filename_filter = usn_command.Flag(
"file_filter", "Regex to match the filename").Default(".").String()
)

const template = `
Expand Down Expand Up @@ -61,10 +65,48 @@ func doUSN() {
ntfs_ctx, err := parser.GetNTFSContext(reader, 0)
kingpin.FatalIfError(err, "Can not open filesystem")

for record := range parser.ParseUSN(context.Background(), ntfs_ctx, 0) {
filename_filter, err := regexp.Compile(*usn_command_filename_filter)
kingpin.FatalIfError(err, "Filename filter")

usn_stream, err := parser.OpenUSNStream(ntfs_ctx)
kingpin.FatalIfError(err, "OpenUSNStream")

for record := range parser.ParseUSN(context.Background(),
ntfs_ctx, usn_stream, 0) {
mft_id := record.FileReferenceNumberID()
mft_seq := uint16(record.FileReferenceNumberSequence())

ntfs_ctx.SetPreload(mft_id, mft_seq,
func(entry *parser.MFTEntrySummary) (*parser.MFTEntrySummary, bool) {
if entry != nil {
return entry, false
}

// Add a fake entry to resolve the filename
return &parser.MFTEntrySummary{
Sequence: mft_seq,
Filenames: []parser.FNSummary{{
Name: record.Filename(),
NameType: "DOS+Win32",
ParentEntryNumber: record.ParentFileReferenceNumberID(),
ParentSequenceNumber: uint16(
record.ParentFileReferenceNumberSequence()),
}},
}, true
})
}

for record := range parser.ParseUSN(
context.Background(), ntfs_ctx, usn_stream, 0) {
filename := record.Filename()

if !filename_filter.MatchString(filename) {
continue
}

fmt.Printf(template, record.Usn(), record.Offset,
record.Filename(),
record.FullPath(), record.TimeStamp(),
filename,
record.Links(), record.TimeStamp(),
strings.Join(record.Reason(), ", "),
strings.Join(record.FileAttributes(), ", "),
strings.Join(record.SourceInfo(), ", "),
Expand Down
65 changes: 62 additions & 3 deletions parser/caching.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,79 @@ type MFTEntryCache struct {
ntfs *NTFSContext

lru *LRU

preloaded map[uint64]*MFTEntrySummary
}

func NewMFTEntryCache(ntfs *NTFSContext) *MFTEntryCache {
lru, _ := NewLRU(10000, nil, "MFTEntryCache")
return &MFTEntryCache{
ntfs: ntfs,
lru: lru,
ntfs: ntfs,
lru: lru,
preloaded: make(map[uint64]*MFTEntrySummary),
}
}

// This function is used to preset persisted information in the cache
// about known MFT entries from other sources than the MFT itself. In
// particular, the USN journal is often a source of additional
// historical information. When resolving an MFT entry summary, we
// first look to the MFT itself, however if the sequence number does
// not match the required entry, we look toh the preloaded entry for a
// better match.
//
// The allows us to substitute historical information (from the USN
// journal) while resolving full paths.
func (self *MFTEntryCache) SetPreload(id uint64, seq uint16,
cb func(entry *MFTEntrySummary) (*MFTEntrySummary, bool)) {
key := id | uint64(seq)<<48

// Optionally allow the callback to update the preloaded entry.
entry, _ := self.preloaded[key]
new_entry, updated := cb(entry)
if updated {
self.preloaded[key] = new_entry
}
}

func (self *MFTEntryCache) GetSummary(id uint64) (*MFTEntrySummary, error) {
// GetSummary gets a MFTEntrySummary for the mft id. The sequence
// number is a hint for the required sequence of the entry. This
// function may return an MFTEntrySummary with a different sequence
// than requested.
func (self *MFTEntryCache) GetSummary(
id uint64, seq uint16) (*MFTEntrySummary, error) {
self.mu.Lock()
defer self.mu.Unlock()

// We prefer to get the read entry from the MFT because it has all
// the short names etc.
res, err := self._GetSummary(id)
if err != nil {
return nil, err
}

// If the MFT entry is not correct (does not have the required
// sequence number), we check the preloaded set for an approximate
// match.
if res.Sequence != seq {
// Try to get from the preloaded records
key := id | uint64(seq)<<48
res, ok := self.preloaded[key]
if ok {
// Yep - the sequence number of correct.
return res, nil
}

// Just return the incorrect entry - callers can add an error
// for incorrect sequence number.
}

return res, nil
}

// Get the summary from the underlying MFT itself.
func (self *MFTEntryCache) _GetSummary(
id uint64) (*MFTEntrySummary, error) {
res_any, pres := self.lru.Get(int(id))
if pres {
res, ok := res_any.(*MFTEntrySummary)
Expand Down
17 changes: 13 additions & 4 deletions parser/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type NTFSContext struct {
mft_entry_lru *LRU

mft_summary_cache *MFTEntryCache

full_path_resolver *FullPathResolver
}

func newNTFSContext(image io.ReaderAt, name string) *NTFSContext {
Expand All @@ -43,9 +45,20 @@ func newNTFSContext(image io.ReaderAt, name string) *NTFSContext {
}

ntfs.mft_summary_cache = NewMFTEntryCache(ntfs)
ntfs.full_path_resolver = &FullPathResolver{
ntfs: ntfs,
options: ntfs.options,
mft_cache: ntfs.mft_summary_cache,
}

return ntfs
}

func (self *NTFSContext) SetPreload(id uint64, seq uint16,
cb func(entry *MFTEntrySummary) (*MFTEntrySummary, bool)) {
self.mft_summary_cache.SetPreload(id, seq, cb)
}

func (self *NTFSContext) Copy() *NTFSContext {
self.mu.Lock()
defer self.mu.Unlock()
Expand Down Expand Up @@ -99,10 +112,6 @@ func (self *NTFSContext) GetRecordSize() int64 {
return self.RecordSize
}

func (self *NTFSContext) GetMFTSummary(id uint64) (*MFTEntrySummary, error) {
return self.mft_summary_cache.GetSummary(id)
}

func (self *NTFSContext) GetMFT(id int64) (*MFT_ENTRY, error) {
// Check the cache first
cached_any, pres := self.mft_entry_lru.Get(int(id))
Expand Down
8 changes: 6 additions & 2 deletions parser/easy.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,8 +465,12 @@ func findNextVCN(attributes []*attrInfo, selected_attribute *attrInfo) (*attrInf
// all related attributes and wraps them in a RangeReader to appear as
// a single stream. This function is what you need when you want to
// read the full file.
func OpenStream(ntfs *NTFSContext,
mft_entry *MFT_ENTRY, attr_type uint64, attr_id uint16, attr_name string) (RangeReaderAt, error) {
func OpenStream(
ntfs *NTFSContext,
mft_entry *MFT_ENTRY,
attr_type uint64,
attr_id uint16,
attr_name string) (RangeReaderAt, error) {

result := &RangeReader{}

Expand Down
34 changes: 24 additions & 10 deletions parser/hardlinks.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,45 @@ func (self *Visitor) Components() [][]string {
return result
}

// The FullPathResolver resolves an MFT entry into a full path.
//
// This resolver can use information from both the USN journal and the
// MFT to reconstruct the full path of an mft entry.
type FullPathResolver struct {
ntfs *NTFSContext
options Options

mft_cache *MFTEntryCache
}

// Walks the MFT entry to get all file names to this MFT entry.
func GetHardLinks(ntfs *NTFSContext, mft_id uint64, max int) [][]string {
func (self *FullPathResolver) GetHardLinks(
mft_id uint64, seq_number uint16, max int) [][]string {
if max == 0 {
max = ntfs.options.MaxLinks
max = self.options.MaxLinks
}

visitor := &Visitor{
Paths: [][]string{[]string{}},
Max: max,
IncludeShortNames: ntfs.options.IncludeShortNames,
Prefix: ntfs.options.PrefixComponents,
IncludeShortNames: self.options.IncludeShortNames,
Prefix: self.options.PrefixComponents,
}

mft_entry_summary, err := ntfs.GetMFTSummary(mft_id)
mft_entry_summary, err := self.mft_cache.GetSummary(
mft_id, seq_number)
if err != nil {
return nil
}
getNames(ntfs, mft_entry_summary, visitor, 0, 0)
self.getNames(mft_entry_summary, visitor, 0, 0)

return visitor.Components()
}

func getNames(ntfs *NTFSContext,
func (self *FullPathResolver) getNames(
mft_entry *MFTEntrySummary, visitor *Visitor, idx, depth int) {

if depth > ntfs.options.MaxDirectoryDepth {
if depth > self.options.MaxDirectoryDepth {
visitor.AddComponent(idx, "<DirTooDeep>")
visitor.AddComponent(idx, "<Err>")
return
Expand Down Expand Up @@ -135,7 +148,8 @@ func getNames(ntfs *NTFSContext,
continue
}

parent_entry, err := ntfs.GetMFTSummary(fn.ParentEntryNumber)
parent_entry, err := self.mft_cache.GetSummary(
fn.ParentEntryNumber, fn.ParentSequenceNumber)
if err != nil {
visitor.AddComponent(visitor_idx, err.Error())
visitor.AddComponent(visitor_idx, "<Err>")
Expand All @@ -150,6 +164,6 @@ func getNames(ntfs *NTFSContext,
continue
}

getNames(ntfs, parent_entry, visitor, visitor_idx, depth+1)
self.getNames(parent_entry, visitor, visitor_idx, depth+1)
}
}
6 changes: 4 additions & 2 deletions parser/mft.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,8 @@ func (self *MFTHighlight) FullPath() string {
}

func (self *MFTHighlight) Links() []string {
components := GetHardLinks(self.ntfs_ctx, uint64(self.EntryNumber),
components := self.ntfs_ctx.full_path_resolver.GetHardLinks(
uint64(self.EntryNumber), self.SequenceNumber,
DefaultMaxLinks)
result := make([]string, 0, len(components))
for _, l := range components {
Expand Down Expand Up @@ -404,7 +405,8 @@ func (self *MFTHighlight) FileName() string {
// so you should consult the Links() to get more info.
func (self *MFTHighlight) Components() []string {
components := []string{}
links := GetHardLinks(self.ntfs_ctx, uint64(self.EntryNumber), 1)
links := self.ntfs_ctx.full_path_resolver.GetHardLinks(
uint64(self.EntryNumber), self.SequenceNumber, 1)
if len(links) > 0 {
components = links[0]
}
Expand Down
3 changes: 2 additions & 1 deletion parser/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ func ModelMFTEntry(ntfs *NTFSContext, mft_entry *MFT_ENTRY) (*NTFSFileInformatio
})
}

for _, l := range GetHardLinks(ntfs, uint64(mft_id), DefaultMaxLinks) {
for _, l := range ntfs.full_path_resolver.GetHardLinks(
uint64(mft_id), result.SequenceNumber, DefaultMaxLinks) {
result.Hardlinks = append(result.Hardlinks, strings.Join(l, "\\"))
}

Expand Down
Loading

0 comments on commit 6952e90

Please sign in to comment.