Skip to content

Commit

Permalink
Merge pull request #1751 from cruessler/wip-changes-against-more-than…
Browse files Browse the repository at this point in the history
…-one-parent

Rework how blame is passed to parents
  • Loading branch information
Byron authored Jan 10, 2025
2 parents 1ca480a + cbf7f51 commit dceb47d
Show file tree
Hide file tree
Showing 5 changed files with 497 additions and 517 deletions.
57 changes: 50 additions & 7 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,10 @@ where
if more_than_one_parent {
// None of the changes affected the file we’re currently blaming.
// Copy blame to parent.
for unblamed_hunk in &mut hunks_to_blame {
unblamed_hunk.clone_blame(suspect, parent_id);
}
hunks_to_blame = hunks_to_blame
.into_iter()
.map(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id))
.collect();
} else {
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
}
Expand All @@ -196,14 +197,56 @@ where
}
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?;
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
}
}
}
if more_than_one_parent {
for unblamed_hunk in &mut hunks_to_blame {

hunks_to_blame = hunks_to_blame
.into_iter()
.filter_map(|mut unblamed_hunk| {
if unblamed_hunk.suspects.len() == 1 {
if let Some(entry) = BlameEntry::from_unblamed_hunk(&unblamed_hunk, suspect) {
// At this point, we have copied blame for every hunk to a parent. Hunks
// that have only `suspect` left in `suspects` have not passed blame to any
// parent and so they can be converted to a `BlameEntry` and moved to
// `out`.
out.push(entry);

return None;
}
}

unblamed_hunk.remove_blame(suspect);

Some(unblamed_hunk)
})
.collect();

// This block asserts that line ranges for each suspect never overlap. If they did overlap
// this would mean that the same line in a *Source File* would map to more than one line in
// the *Blamed File* and this is not possible.
#[cfg(debug_assertions)]
{
let ranges = hunks_to_blame.iter().fold(
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
|mut acc, hunk| {
for (suspect, range) in hunk.suspects.clone() {
acc.entry(suspect).or_default().push(range);
}

acc
},
);

for (_, mut ranges) in ranges {
ranges.sort_by(|a, b| a.start.cmp(&b.start));

for window in ranges.windows(2) {
if let [a, b] = window {
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
}
}
}
}
}
Expand Down
119 changes: 37 additions & 82 deletions gix-blame/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ pub(super) mod function;
///
/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*.
fn process_change(
out: &mut Vec<BlameEntry>,
new_hunks_to_blame: &mut Vec<UnblamedHunk>,
offset: &mut Offset,
suspect: ObjectId,
parent: ObjectId,
hunk: Option<UnblamedHunk>,
change: Option<Change>,
) -> (Option<UnblamedHunk>, Option<Change>) {
Expand All @@ -40,6 +40,8 @@ fn process_change(
match (hunk, change) {
(Some(hunk), Some(Change::Unchanged(unchanged))) => {
let Some(range_in_suspect) = hunk.suspects.get(&suspect) else {
// We don’t clone blame to `parent` as `suspect` has nothing to do with this
// `hunk`.
new_hunks_to_blame.push(hunk);
return (None, Some(Change::Unchanged(unchanged)));
};
Expand All @@ -64,7 +66,7 @@ fn process_change(

// Nothing to do with `hunk` except shifting it,
// but `unchanged` needs to be checked against the next hunk to catch up.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, Some(Change::Unchanged(unchanged)))
}
(false, false) => {
Expand Down Expand Up @@ -93,7 +95,7 @@ fn process_change(

// Nothing to do with `hunk` except shifting it,
// but `unchanged` needs to be checked against the next hunk to catch up.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, Some(Change::Unchanged(unchanged)))
}
}
Expand Down Expand Up @@ -123,7 +125,7 @@ fn process_change(
}
Either::Right((before, after)) => {
// requeue the left side `before` after offsetting it…
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
// …and treat `after` as `new_hunk`, which contains the `added` range.
after
}
Expand All @@ -132,20 +134,18 @@ fn process_change(
*offset += added.end - added.start;
*offset -= number_of_lines_deleted;

// The overlapping `added` section was successfully located.
out.push(BlameEntry::with_offset(
added.clone(),
suspect,
hunk_starting_at_added.offset_for(suspect),
));

// The overlapping `added` section was successfully located.
// Re-split at the end of `added` to continue with what's after.
match hunk_starting_at_added.split_at(suspect, added.end) {
Either::Left(_) => {
Either::Left(hunk) => {
new_hunks_to_blame.push(hunk);

// Nothing to split, so we are done with this hunk.
(None, None)
}
Either::Right((_, after)) => {
Either::Right((hunk, after)) => {
new_hunks_to_blame.push(hunk);

// Keep processing the unblamed range after `added`
(Some(after), None)
}
Expand All @@ -162,17 +162,13 @@ fn process_change(
Either::Left(hunk) => hunk,
Either::Right((before, after)) => {
// Keep looking for the left side of the unblamed portion.
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
after
}
};

// We can 'blame' the overlapping area of `added` and `hunk`.
out.push(BlameEntry::with_offset(
added.start..range_in_suspect.end,
suspect,
hunk_starting_at_added.offset_for(suspect),
));
new_hunks_to_blame.push(hunk_starting_at_added);
// Keep processing `added`, it's portion past `hunk` may still contribute.
(None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted)))
}
Expand All @@ -183,18 +179,20 @@ fn process_change(
// <---> (blamed)
// <--> (new hunk)

out.push(BlameEntry::with_offset(
range_in_suspect.start..added.end,
suspect,
hunk.offset_for(suspect),
));

*offset += added.end - added.start;
*offset -= number_of_lines_deleted;

match hunk.split_at(suspect, added.end) {
Either::Left(_) => (None, None),
Either::Right((_, after)) => (Some(after), None),
Either::Left(hunk) => {
new_hunks_to_blame.push(hunk);

(None, None)
}
Either::Right((before, after)) => {
new_hunks_to_blame.push(before);

(Some(after), None)
}
}
}
(false, false) => {
Expand Down Expand Up @@ -222,7 +220,7 @@ fn process_change(
// <----> (added)

// Retry `hunk` once there is overlapping changes to process.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));

// Let hunks catchup with this change.
(
Expand All @@ -237,11 +235,7 @@ fn process_change(
// <---> (blamed)

// Successfully blame the whole range.
out.push(BlameEntry::with_offset(
range_in_suspect.clone(),
suspect,
hunk.offset_for(suspect),
));
new_hunks_to_blame.push(hunk);

// And keep processing `added` with future `hunks` that might be affected by it.
(
Expand Down Expand Up @@ -279,7 +273,7 @@ fn process_change(
}
Either::Right((before, after)) => {
// `before` isn't affected by deletion, so keep it for later.
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
// after will be affected by offset, and we will see if there are more changes affecting it.
after
}
Expand All @@ -291,7 +285,8 @@ fn process_change(
// | (line_number_in_destination)

// Catchup with changes.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));

(
None,
Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)),
Expand All @@ -300,7 +295,7 @@ fn process_change(
}
(Some(hunk), None) => {
// nothing to do - changes are exhausted, re-evaluate `hunk`.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, None)
}
(None, Some(Change::Unchanged(_))) => {
Expand Down Expand Up @@ -328,10 +323,10 @@ fn process_change(
/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
/// Once a match is found, it's pushed onto `out`.
fn process_changes(
out: &mut Vec<BlameEntry>,
hunks_to_blame: Vec<UnblamedHunk>,
changes: Vec<Change>,
suspect: ObjectId,
parent: ObjectId,
) -> Vec<UnblamedHunk> {
let mut hunks_iter = hunks_to_blame.into_iter();
let mut changes_iter = changes.into_iter();
Expand All @@ -344,10 +339,10 @@ fn process_changes(

loop {
(hunk, change) = process_change(
out,
&mut new_hunks_to_blame,
&mut offset_in_destination,
suspect,
parent,
hunk,
change,
);
Expand Down Expand Up @@ -407,30 +402,20 @@ impl UnblamedHunk {
}
}

fn offset_for(&self, suspect: ObjectId) -> Offset {
let range_in_suspect = self
.suspects
.get(&suspect)
.expect("Internal and we know suspect is present");

if self.range_in_blamed_file.start > range_in_suspect.start {
Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start)
} else {
Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start)
}
}

/// Transfer all ranges from the commit at `from` to the commit at `to`.
fn pass_blame(&mut self, from: ObjectId, to: ObjectId) {
if let Some(range_in_suspect) = self.suspects.remove(&from) {
self.suspects.insert(to, range_in_suspect);
}
}

fn clone_blame(&mut self, from: ObjectId, to: ObjectId) {
// TODO
// Should this also accept `&mut self` as the other functions do?
fn clone_blame(mut self, from: ObjectId, to: ObjectId) -> Self {
if let Some(range_in_suspect) = self.suspects.get(&from) {
self.suspects.insert(to, range_in_suspect.clone());
}
self
}

fn remove_blame(&mut self, suspect: ObjectId) {
Expand All @@ -439,36 +424,6 @@ impl UnblamedHunk {
}

impl BlameEntry {
/// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_source_file`.
fn with_offset(range_in_source_file: Range<u32>, commit_id: ObjectId, offset: Offset) -> Self {
debug_assert!(
range_in_source_file.end > range_in_source_file.start,
"{range_in_source_file:?}"
);

match offset {
Offset::Added(added) => Self {
start_in_blamed_file: range_in_source_file.start + added,
start_in_source_file: range_in_source_file.start,
len: force_non_zero(range_in_source_file.len() as u32),
commit_id,
},
Offset::Deleted(deleted) => {
debug_assert!(
range_in_source_file.start >= deleted,
"{range_in_source_file:?} {offset:?}"
);

Self {
start_in_blamed_file: range_in_source_file.start - deleted,
start_in_source_file: range_in_source_file.start,
len: force_non_zero(range_in_source_file.len() as u32),
commit_id,
}
}
}
}

/// Create an offset from a portion of the *Blamed File*.
fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Option<Self> {
let range_in_source_file = unblamed_hunk.suspects.get(&commit_id)?;
Expand Down
Loading

0 comments on commit dceb47d

Please sign in to comment.