Skip to content

Commit a85c1fe

Browse files
committed
gix-blame: allow interrupting an incremental blame
The BlameSink type now returns a std::ops::ControlFlow value that can be used to interrupt the blame early. Signed-off-by: Vicent Marti <[email protected]>
1 parent 248303e commit a85c1fe

File tree

5 files changed

+291
-167
lines changed

5 files changed

+291
-167
lines changed

gix-blame/benches/incremental_blame.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::{
22
env,
3+
ops::ControlFlow,
34
path::{Path, PathBuf},
45
process::Command,
56
};
@@ -13,7 +14,9 @@ const DEFAULT_BENCH_PATH: &str = "gix-blame/src/file/function.rs";
1314
struct DiscardSink;
1415

1516
impl BlameSink for DiscardSink {
16-
fn push(&mut self, _entry: BlameEntry) {}
17+
fn push(&mut self, _entry: BlameEntry) -> ControlFlow<()> {
18+
ControlFlow::Continue(())
19+
}
1720
}
1821

1922
fn incremental_options() -> gix_blame::Options {

gix-blame/src/file/function.rs

Lines changed: 183 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::num::NonZeroU32;
1+
use std::ops::ControlFlow;
22

33
use gix_diff::{blob::intern::TokenSource, tree::Visit};
44
use gix_hash::ObjectId;
@@ -9,7 +9,7 @@ use gix_object::{
99
use gix_traverse::commit::find as find_commit;
1010
use smallvec::SmallVec;
1111

12-
use super::{process_changes, Change, UnblamedHunk};
12+
use super::{coalesce_blame_entries, process_changes, Change, UnblamedHunk};
1313
use crate::{types::BlamePathEntry, BlameEntry, BlameSink, Error, IncrementalOutcome, Options, Outcome, Statistics};
1414

1515
/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
@@ -107,6 +107,7 @@ pub fn incremental(
107107
} else {
108108
None
109109
};
110+
let mut stopped_early = false;
110111

111112
'outer: while let Some(suspect) = queue.pop_value() {
112113
stats.commits_traversed += 1;
@@ -131,11 +132,14 @@ pub fn incremental(
131132

132133
if let Some(since) = options.since {
133134
if commit_time < since.seconds {
134-
if unblamed_to_out_is_done(&mut hunks_to_blame, sink, suspect) {
135-
break 'outer;
135+
match emit_unblamed_hunks(&mut hunks_to_blame, sink, suspect) {
136+
ControlFlow::Continue(true) => break 'outer,
137+
ControlFlow::Continue(false) => continue,
138+
ControlFlow::Break(()) => {
139+
stopped_early = true;
140+
break 'outer;
141+
}
136142
}
137-
138-
continue;
139143
}
140144
}
141145

@@ -148,25 +152,32 @@ pub fn incremental(
148152
// the remaining lines to it, even though we don’t explicitly check whether that is
149153
// true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
150154
// an empty tree to validate this assumption.
151-
if unblamed_to_out_is_done(&mut hunks_to_blame, sink, suspect) {
152-
if let Some(ref mut blame_path) = blame_path {
153-
let entry = previous_entry
154-
.take()
155-
.filter(|(id, _)| *id == suspect)
156-
.map(|(_, entry)| entry);
157-
158-
let blame_path_entry = BlamePathEntry {
159-
source_file_path: current_file_path.clone(),
160-
previous_source_file_path: None,
161-
commit_id: suspect,
162-
blob_id: entry.unwrap_or(ObjectId::null(gix_hash::Kind::Sha1)),
163-
previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
164-
parent_index: 0,
165-
};
166-
blame_path.push(blame_path_entry);
167-
}
155+
match emit_unblamed_hunks(&mut hunks_to_blame, sink, suspect) {
156+
ControlFlow::Continue(true) => {
157+
if let Some(ref mut blame_path) = blame_path {
158+
let entry = previous_entry
159+
.take()
160+
.filter(|(id, _)| *id == suspect)
161+
.map(|(_, entry)| entry);
168162

169-
break 'outer;
163+
let blame_path_entry = BlamePathEntry {
164+
source_file_path: current_file_path.clone(),
165+
previous_source_file_path: None,
166+
commit_id: suspect,
167+
blob_id: entry.unwrap_or(ObjectId::null(gix_hash::Kind::Sha1)),
168+
previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
169+
parent_index: 0,
170+
};
171+
blame_path.push(blame_path_entry);
172+
}
173+
174+
break 'outer;
175+
}
176+
ControlFlow::Continue(false) => {}
177+
ControlFlow::Break(()) => {
178+
stopped_early = true;
179+
break 'outer;
180+
}
170181
}
171182
}
172183
// There is more, keep looking.
@@ -304,20 +315,29 @@ pub fn incremental(
304315
// Do nothing under the assumption that this always (or almost always)
305316
// implies that the file comes from a different parent, compared to which
306317
// it was modified, not added.
307-
} else if unblamed_to_out_is_done(&mut hunks_to_blame, sink, suspect) {
308-
if let Some(ref mut blame_path) = blame_path {
309-
let blame_path_entry = BlamePathEntry {
310-
source_file_path: current_file_path.clone(),
311-
previous_source_file_path: None,
312-
commit_id: suspect,
313-
blob_id: id,
314-
previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
315-
parent_index: index,
316-
};
317-
blame_path.push(blame_path_entry);
318+
} else {
319+
match emit_unblamed_hunks(&mut hunks_to_blame, sink, suspect) {
320+
ControlFlow::Continue(true) => {
321+
if let Some(ref mut blame_path) = blame_path {
322+
let blame_path_entry = BlamePathEntry {
323+
source_file_path: current_file_path.clone(),
324+
previous_source_file_path: None,
325+
commit_id: suspect,
326+
blob_id: id,
327+
previous_blob_id: ObjectId::null(gix_hash::Kind::Sha1),
328+
parent_index: index,
329+
};
330+
blame_path.push(blame_path_entry);
331+
}
332+
333+
break 'outer;
334+
}
335+
ControlFlow::Continue(false) => {}
336+
ControlFlow::Break(()) => {
337+
stopped_early = true;
338+
break 'outer;
339+
}
318340
}
319-
320-
break 'outer;
321341
}
322342
}
323343
TreeDiffChange::Deletion => {
@@ -395,27 +415,19 @@ pub fn incremental(
395415
}
396416
}
397417

398-
hunks_to_blame.retain_mut(|unblamed_hunk| {
399-
if unblamed_hunk.suspects.len() == 1 {
400-
if let Some(entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) {
401-
// At this point, we have copied blame for every hunk to a parent. Hunks
402-
// that have only `suspect` left in `suspects` have not passed blame to any
403-
// parent, and so they can be converted to a `BlameEntry` and moved to
404-
// the sink.
405-
sink.push(entry);
406-
return false;
407-
}
408-
}
409-
unblamed_hunk.remove_blame(suspect);
410-
true
411-
});
418+
if let ControlFlow::Break(()) = emit_single_suspect_hunks(&mut hunks_to_blame, sink, suspect) {
419+
stopped_early = true;
420+
break 'outer;
421+
}
412422
}
413423

414-
debug_assert_eq!(
415-
hunks_to_blame,
416-
vec![],
417-
"only if there is no portion of the file left we have completed the blame"
418-
);
424+
if !stopped_early {
425+
debug_assert_eq!(
426+
hunks_to_blame,
427+
vec![],
428+
"only if there is no portion of the file left we have completed the blame"
429+
);
430+
}
419431

420432
Ok(IncrementalOutcome {
421433
blob: blamed_file_blob,
@@ -471,75 +483,60 @@ fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<Unb
471483
}
472484
}
473485

474-
/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process,
475-
/// and emit each entry to `sink`.
486+
/// Convert each of the unblamed hunks in `hunks_to_blame` into [`BlameEntry`] values, consuming
487+
/// the ones emitted to `sink`.
476488
///
477-
/// Return `true` if we are done because `hunks_to_blame` is empty.
478-
fn unblamed_to_out_is_done(
489+
/// Return [`ControlFlow::Continue`] with `true` if we are done because `hunks_to_blame` is empty.
490+
fn emit_unblamed_hunks(
479491
hunks_to_blame: &mut Vec<UnblamedHunk>,
480492
sink: &mut impl BlameSink,
481493
suspect: ObjectId,
482-
) -> bool {
494+
) -> ControlFlow<(), bool> {
483495
let mut without_suspect = Vec::new();
484-
for hunk in hunks_to_blame.drain(..) {
496+
let mut remaining_hunks = std::mem::take(hunks_to_blame).into_iter();
497+
while let Some(hunk) = remaining_hunks.next() {
485498
if let Some(entry) = BlameEntry::from_unblamed_hunk(&hunk, suspect) {
486-
sink.push(entry);
499+
if let ControlFlow::Break(()) = sink.push(entry) {
500+
without_suspect.extend(remaining_hunks);
501+
*hunks_to_blame = without_suspect;
502+
return ControlFlow::Break(());
503+
}
487504
} else {
488505
without_suspect.push(hunk);
489506
}
490507
}
491508
*hunks_to_blame = without_suspect;
492-
hunks_to_blame.is_empty()
509+
ControlFlow::Continue(hunks_to_blame.is_empty())
493510
}
494511

495-
/// This function merges adjacent blame entries. It merges entries that are adjacent both in the
496-
/// blamed file and in the source file that introduced them. This follows `git`’s
497-
/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the
498-
/// blamed file which can result in different blames in certain edge cases. See [the commit][1]
499-
/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test
500-
/// for this behaviour in `git`.
501-
///
502-
/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a
503-
/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131
504-
fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
505-
let len = lines_blamed.len();
506-
lines_blamed
507-
.into_iter()
508-
.fold(Vec::with_capacity(len), |mut acc, entry| {
509-
let previous_entry = acc.last();
510-
511-
if let Some(previous_entry) = previous_entry {
512-
let previous_blamed_range = previous_entry.range_in_blamed_file();
513-
let current_blamed_range = entry.range_in_blamed_file();
514-
let previous_source_range = previous_entry.range_in_source_file();
515-
let current_source_range = entry.range_in_source_file();
516-
if previous_entry.commit_id == entry.commit_id
517-
&& previous_blamed_range.end == current_blamed_range.start
518-
// As of 2024-09-19, the check below only is in `git`, but not in `libgit2`.
519-
&& previous_source_range.end == current_source_range.start
520-
{
521-
let coalesced_entry = BlameEntry {
522-
start_in_blamed_file: previous_blamed_range.start as u32,
523-
start_in_source_file: previous_source_range.start as u32,
524-
len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32)
525-
.expect("BUG: hunks are never zero-sized"),
526-
commit_id: previous_entry.commit_id,
527-
source_file_name: previous_entry.source_file_name.clone(),
528-
};
529-
530-
acc.pop();
531-
acc.push(coalesced_entry);
532-
} else {
533-
acc.push(entry);
512+
/// Convert hunks that still have `suspect` as their only candidate into [`BlameEntry`] values.
513+
fn emit_single_suspect_hunks(
514+
hunks_to_blame: &mut Vec<UnblamedHunk>,
515+
sink: &mut impl BlameSink,
516+
suspect: ObjectId,
517+
) -> ControlFlow<()> {
518+
let mut remaining_hunks = Vec::with_capacity(hunks_to_blame.len());
519+
let mut pending_hunks = std::mem::take(hunks_to_blame).into_iter();
520+
while let Some(mut unblamed_hunk) = pending_hunks.next() {
521+
if unblamed_hunk.suspects.len() == 1 {
522+
if let Some(entry) = BlameEntry::from_unblamed_hunk(&unblamed_hunk, suspect) {
523+
// At this point, we have copied blame for every hunk to a parent. Hunks that have
524+
// only `suspect` left in `suspects` have not passed blame to any parent, and so
525+
// they can be converted to a `BlameEntry` and moved to the sink.
526+
if let ControlFlow::Break(()) = sink.push(entry) {
527+
remaining_hunks.extend(pending_hunks);
528+
*hunks_to_blame = remaining_hunks;
529+
return ControlFlow::Break(());
534530
}
535-
536-
acc
537-
} else {
538-
acc.push(entry);
539-
540-
acc
531+
continue;
541532
}
542-
})
533+
}
534+
unblamed_hunk.remove_blame(suspect);
535+
remaining_hunks.push(unblamed_hunk);
536+
}
537+
538+
*hunks_to_blame = remaining_hunks;
539+
ControlFlow::Continue(())
543540
}
544541

545542
/// The union of [`gix_diff::tree::recorder::Change`] and [`gix_diff::tree_with_rewrites::Change`],
@@ -959,3 +956,79 @@ fn maybe_changed_path_in_bloom_filter(
959956
pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
960957
gix_diff::blob::sources::byte_lines_with_terminator(data)
961958
}
959+
960+
#[cfg(test)]
961+
mod tests {
962+
use gix_object::bstr::BString;
963+
use gix_testtools::Result;
964+
965+
use super::{file, incremental};
966+
use crate::{file::coalesce_blame_entries, BlameRanges, Options};
967+
968+
struct Fixture {
969+
repo: gix::Repository,
970+
resource_cache: gix_diff::blob::Platform,
971+
suspect: gix_hash::ObjectId,
972+
}
973+
974+
impl Fixture {
975+
fn new() -> Result<Self> {
976+
let repo_path = gix_testtools::scripted_fixture_read_only("make_blame_repo.sh")?;
977+
let repo = gix::open(&repo_path)?;
978+
let suspect = repo.rev_parse_single("HEAD")?.detach();
979+
let resource_cache = repo.diff_resource_cache_for_tree_diff()?;
980+
Ok(Self {
981+
repo,
982+
resource_cache,
983+
suspect,
984+
})
985+
}
986+
}
987+
988+
fn options() -> Options {
989+
Options {
990+
diff_algorithm: gix_diff::blob::Algorithm::Histogram,
991+
ranges: BlameRanges::default(),
992+
since: None,
993+
rewrites: Some(gix_diff::Rewrites::default()),
994+
debug_track_path: false,
995+
}
996+
}
997+
998+
#[test]
999+
fn vec_sink_can_reconstruct_file_entries() -> Result {
1000+
let Fixture {
1001+
repo,
1002+
mut resource_cache,
1003+
suspect,
1004+
} = Fixture::new()?;
1005+
let source_file_name: BString = "simple.txt".into();
1006+
1007+
let mut streamed_entries = Vec::new();
1008+
incremental(
1009+
&repo,
1010+
suspect,
1011+
None,
1012+
&mut resource_cache,
1013+
source_file_name.as_ref(),
1014+
&mut streamed_entries,
1015+
options(),
1016+
)?;
1017+
1018+
streamed_entries.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
1019+
let streamed_entries = coalesce_blame_entries(streamed_entries);
1020+
1021+
let file_entries = file(
1022+
&repo,
1023+
suspect,
1024+
None,
1025+
&mut resource_cache,
1026+
source_file_name.as_ref(),
1027+
options(),
1028+
)?
1029+
.entries;
1030+
1031+
pretty_assertions::assert_eq!(streamed_entries, file_entries);
1032+
Ok(())
1033+
}
1034+
}

0 commit comments

Comments
 (0)