1- use std:: num :: NonZeroU32 ;
1+ use std:: ops :: ControlFlow ;
22
33use gix_diff:: { blob:: intern:: TokenSource , tree:: Visit } ;
44use gix_hash:: ObjectId ;
@@ -9,7 +9,7 @@ use gix_object::{
99use gix_traverse:: commit:: find as find_commit;
1010use smallvec:: SmallVec ;
1111
12- use super :: { process_changes, Change , UnblamedHunk } ;
12+ use super :: { coalesce_blame_entries , process_changes, Change , UnblamedHunk } ;
1313use crate :: { types:: BlamePathEntry , BlameEntry , BlameSink , Error , IncrementalOutcome , Options , Outcome , Statistics } ;
1414
1515/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
@@ -107,6 +107,7 @@ pub fn incremental(
107107 } else {
108108 None
109109 } ;
110+ let mut stopped_early = false ;
110111
111112 ' outer: while let Some ( suspect) = queue. pop_value ( ) {
112113 stats. commits_traversed += 1 ;
@@ -131,11 +132,14 @@ pub fn incremental(
131132
132133 if let Some ( since) = options. since {
133134 if commit_time < since. seconds {
134- if unblamed_to_out_is_done ( & mut hunks_to_blame, sink, suspect) {
135- break ' outer;
135+ match emit_unblamed_hunks ( & mut hunks_to_blame, sink, suspect) {
136+ ControlFlow :: Continue ( true ) => break ' outer,
137+ ControlFlow :: Continue ( false ) => continue ,
138+ ControlFlow :: Break ( ( ) ) => {
139+ stopped_early = true ;
140+ break ' outer;
141+ }
136142 }
137-
138- continue ;
139143 }
140144 }
141145
@@ -148,25 +152,32 @@ pub fn incremental(
148152 // the remaining lines to it, even though we don’t explicitly check whether that is
149153 // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
150154 // an empty tree to validate this assumption.
151- if unblamed_to_out_is_done ( & mut hunks_to_blame, sink, suspect) {
152- if let Some ( ref mut blame_path) = blame_path {
153- let entry = previous_entry
154- . take ( )
155- . filter ( |( id, _) | * id == suspect)
156- . map ( |( _, entry) | entry) ;
157-
158- let blame_path_entry = BlamePathEntry {
159- source_file_path : current_file_path. clone ( ) ,
160- previous_source_file_path : None ,
161- commit_id : suspect,
162- blob_id : entry. unwrap_or ( ObjectId :: null ( gix_hash:: Kind :: Sha1 ) ) ,
163- previous_blob_id : ObjectId :: null ( gix_hash:: Kind :: Sha1 ) ,
164- parent_index : 0 ,
165- } ;
166- blame_path. push ( blame_path_entry) ;
167- }
155+ match emit_unblamed_hunks ( & mut hunks_to_blame, sink, suspect) {
156+ ControlFlow :: Continue ( true ) => {
157+ if let Some ( ref mut blame_path) = blame_path {
158+ let entry = previous_entry
159+ . take ( )
160+ . filter ( |( id, _) | * id == suspect)
161+ . map ( |( _, entry) | entry) ;
168162
169- break ' outer;
163+ let blame_path_entry = BlamePathEntry {
164+ source_file_path : current_file_path. clone ( ) ,
165+ previous_source_file_path : None ,
166+ commit_id : suspect,
167+ blob_id : entry. unwrap_or ( ObjectId :: null ( gix_hash:: Kind :: Sha1 ) ) ,
168+ previous_blob_id : ObjectId :: null ( gix_hash:: Kind :: Sha1 ) ,
169+ parent_index : 0 ,
170+ } ;
171+ blame_path. push ( blame_path_entry) ;
172+ }
173+
174+ break ' outer;
175+ }
176+ ControlFlow :: Continue ( false ) => { }
177+ ControlFlow :: Break ( ( ) ) => {
178+ stopped_early = true ;
179+ break ' outer;
180+ }
170181 }
171182 }
172183 // There is more, keep looking.
@@ -304,20 +315,29 @@ pub fn incremental(
304315 // Do nothing under the assumption that this always (or almost always)
305316 // implies that the file comes from a different parent, compared to which
306317 // it was modified, not added.
307- } else if unblamed_to_out_is_done ( & mut hunks_to_blame, sink, suspect) {
308- if let Some ( ref mut blame_path) = blame_path {
309- let blame_path_entry = BlamePathEntry {
310- source_file_path : current_file_path. clone ( ) ,
311- previous_source_file_path : None ,
312- commit_id : suspect,
313- blob_id : id,
314- previous_blob_id : ObjectId :: null ( gix_hash:: Kind :: Sha1 ) ,
315- parent_index : index,
316- } ;
317- blame_path. push ( blame_path_entry) ;
318+ } else {
319+ match emit_unblamed_hunks ( & mut hunks_to_blame, sink, suspect) {
320+ ControlFlow :: Continue ( true ) => {
321+ if let Some ( ref mut blame_path) = blame_path {
322+ let blame_path_entry = BlamePathEntry {
323+ source_file_path : current_file_path. clone ( ) ,
324+ previous_source_file_path : None ,
325+ commit_id : suspect,
326+ blob_id : id,
327+ previous_blob_id : ObjectId :: null ( gix_hash:: Kind :: Sha1 ) ,
328+ parent_index : index,
329+ } ;
330+ blame_path. push ( blame_path_entry) ;
331+ }
332+
333+ break ' outer;
334+ }
335+ ControlFlow :: Continue ( false ) => { }
336+ ControlFlow :: Break ( ( ) ) => {
337+ stopped_early = true ;
338+ break ' outer;
339+ }
318340 }
319-
320- break ' outer;
321341 }
322342 }
323343 TreeDiffChange :: Deletion => {
@@ -395,27 +415,19 @@ pub fn incremental(
395415 }
396416 }
397417
398- hunks_to_blame. retain_mut ( |unblamed_hunk| {
399- if unblamed_hunk. suspects . len ( ) == 1 {
400- if let Some ( entry) = BlameEntry :: from_unblamed_hunk ( unblamed_hunk, suspect) {
401- // At this point, we have copied blame for every hunk to a parent. Hunks
402- // that have only `suspect` left in `suspects` have not passed blame to any
403- // parent, and so they can be converted to a `BlameEntry` and moved to
404- // the sink.
405- sink. push ( entry) ;
406- return false ;
407- }
408- }
409- unblamed_hunk. remove_blame ( suspect) ;
410- true
411- } ) ;
418+ if let ControlFlow :: Break ( ( ) ) = emit_single_suspect_hunks ( & mut hunks_to_blame, sink, suspect) {
419+ stopped_early = true ;
420+ break ' outer;
421+ }
412422 }
413423
414- debug_assert_eq ! (
415- hunks_to_blame,
416- vec![ ] ,
417- "only if there is no portion of the file left we have completed the blame"
418- ) ;
424+ if !stopped_early {
425+ debug_assert_eq ! (
426+ hunks_to_blame,
427+ vec![ ] ,
428+ "only if there is no portion of the file left we have completed the blame"
429+ ) ;
430+ }
419431
420432 Ok ( IncrementalOutcome {
421433 blob : blamed_file_blob,
@@ -471,75 +483,60 @@ fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<Unb
471483 }
472484}
473485
474- /// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process,
475- /// and emit each entry to `sink`.
486+ /// Convert each of the unblamed hunks in `hunks_to_blame` into [`BlameEntry`] values , consuming
487+ /// the ones emitted to `sink`.
476488///
477- /// Return `true` if we are done because `hunks_to_blame` is empty.
478- fn unblamed_to_out_is_done (
489+ /// Return [`ControlFlow::Continue`] with `true` if we are done because `hunks_to_blame` is empty.
490+ fn emit_unblamed_hunks (
479491 hunks_to_blame : & mut Vec < UnblamedHunk > ,
480492 sink : & mut impl BlameSink ,
481493 suspect : ObjectId ,
482- ) -> bool {
494+ ) -> ControlFlow < ( ) , bool > {
483495 let mut without_suspect = Vec :: new ( ) ;
484- for hunk in hunks_to_blame. drain ( ..) {
496+ let mut remaining_hunks = std:: mem:: take ( hunks_to_blame) . into_iter ( ) ;
497+ while let Some ( hunk) = remaining_hunks. next ( ) {
485498 if let Some ( entry) = BlameEntry :: from_unblamed_hunk ( & hunk, suspect) {
486- sink. push ( entry) ;
499+ if let ControlFlow :: Break ( ( ) ) = sink. push ( entry) {
500+ without_suspect. extend ( remaining_hunks) ;
501+ * hunks_to_blame = without_suspect;
502+ return ControlFlow :: Break ( ( ) ) ;
503+ }
487504 } else {
488505 without_suspect. push ( hunk) ;
489506 }
490507 }
491508 * hunks_to_blame = without_suspect;
492- hunks_to_blame. is_empty ( )
509+ ControlFlow :: Continue ( hunks_to_blame. is_empty ( ) )
493510}
494511
495- /// This function merges adjacent blame entries. It merges entries that are adjacent both in the
496- /// blamed file and in the source file that introduced them. This follows `git`’s
497- /// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the
498- /// blamed file which can result in different blames in certain edge cases. See [the commit][1]
499- /// that introduced the extra check into `git` for context. See [this commit][2] for a way to test
500- /// for this behaviour in `git`.
501- ///
502- /// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a
503- /// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131
504- fn coalesce_blame_entries ( lines_blamed : Vec < BlameEntry > ) -> Vec < BlameEntry > {
505- let len = lines_blamed. len ( ) ;
506- lines_blamed
507- . into_iter ( )
508- . fold ( Vec :: with_capacity ( len) , |mut acc, entry| {
509- let previous_entry = acc. last ( ) ;
510-
511- if let Some ( previous_entry) = previous_entry {
512- let previous_blamed_range = previous_entry. range_in_blamed_file ( ) ;
513- let current_blamed_range = entry. range_in_blamed_file ( ) ;
514- let previous_source_range = previous_entry. range_in_source_file ( ) ;
515- let current_source_range = entry. range_in_source_file ( ) ;
516- if previous_entry. commit_id == entry. commit_id
517- && previous_blamed_range. end == current_blamed_range. start
518- // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`.
519- && previous_source_range. end == current_source_range. start
520- {
521- let coalesced_entry = BlameEntry {
522- start_in_blamed_file : previous_blamed_range. start as u32 ,
523- start_in_source_file : previous_source_range. start as u32 ,
524- len : NonZeroU32 :: new ( ( current_source_range. end - previous_source_range. start ) as u32 )
525- . expect ( "BUG: hunks are never zero-sized" ) ,
526- commit_id : previous_entry. commit_id ,
527- source_file_name : previous_entry. source_file_name . clone ( ) ,
528- } ;
529-
530- acc. pop ( ) ;
531- acc. push ( coalesced_entry) ;
532- } else {
533- acc. push ( entry) ;
512+ /// Convert hunks that still have `suspect` as their only candidate into [`BlameEntry`] values.
513+ fn emit_single_suspect_hunks (
514+ hunks_to_blame : & mut Vec < UnblamedHunk > ,
515+ sink : & mut impl BlameSink ,
516+ suspect : ObjectId ,
517+ ) -> ControlFlow < ( ) > {
518+ let mut remaining_hunks = Vec :: with_capacity ( hunks_to_blame. len ( ) ) ;
519+ let mut pending_hunks = std:: mem:: take ( hunks_to_blame) . into_iter ( ) ;
520+ while let Some ( mut unblamed_hunk) = pending_hunks. next ( ) {
521+ if unblamed_hunk. suspects . len ( ) == 1 {
522+ if let Some ( entry) = BlameEntry :: from_unblamed_hunk ( & unblamed_hunk, suspect) {
523+ // At this point, we have copied blame for every hunk to a parent. Hunks that have
524+ // only `suspect` left in `suspects` have not passed blame to any parent, and so
525+ // they can be converted to a `BlameEntry` and moved to the sink.
526+ if let ControlFlow :: Break ( ( ) ) = sink. push ( entry) {
527+ remaining_hunks. extend ( pending_hunks) ;
528+ * hunks_to_blame = remaining_hunks;
529+ return ControlFlow :: Break ( ( ) ) ;
534530 }
535-
536- acc
537- } else {
538- acc. push ( entry) ;
539-
540- acc
531+ continue ;
541532 }
542- } )
533+ }
534+ unblamed_hunk. remove_blame ( suspect) ;
535+ remaining_hunks. push ( unblamed_hunk) ;
536+ }
537+
538+ * hunks_to_blame = remaining_hunks;
539+ ControlFlow :: Continue ( ( ) )
543540}
544541
545542/// The union of [`gix_diff::tree::recorder::Change`] and [`gix_diff::tree_with_rewrites::Change`],
@@ -959,3 +956,79 @@ fn maybe_changed_path_in_bloom_filter(
959956pub ( crate ) fn tokens_for_diffing ( data : & [ u8 ] ) -> impl TokenSource < Token = & [ u8 ] > {
960957 gix_diff:: blob:: sources:: byte_lines_with_terminator ( data)
961958}
959+
960+ #[ cfg( test) ]
961+ mod tests {
962+ use gix_object:: bstr:: BString ;
963+ use gix_testtools:: Result ;
964+
965+ use super :: { file, incremental} ;
966+ use crate :: { file:: coalesce_blame_entries, BlameRanges , Options } ;
967+
968+ struct Fixture {
969+ repo : gix:: Repository ,
970+ resource_cache : gix_diff:: blob:: Platform ,
971+ suspect : gix_hash:: ObjectId ,
972+ }
973+
974+ impl Fixture {
975+ fn new ( ) -> Result < Self > {
976+ let repo_path = gix_testtools:: scripted_fixture_read_only ( "make_blame_repo.sh" ) ?;
977+ let repo = gix:: open ( & repo_path) ?;
978+ let suspect = repo. rev_parse_single ( "HEAD" ) ?. detach ( ) ;
979+ let resource_cache = repo. diff_resource_cache_for_tree_diff ( ) ?;
980+ Ok ( Self {
981+ repo,
982+ resource_cache,
983+ suspect,
984+ } )
985+ }
986+ }
987+
988+ fn options ( ) -> Options {
989+ Options {
990+ diff_algorithm : gix_diff:: blob:: Algorithm :: Histogram ,
991+ ranges : BlameRanges :: default ( ) ,
992+ since : None ,
993+ rewrites : Some ( gix_diff:: Rewrites :: default ( ) ) ,
994+ debug_track_path : false ,
995+ }
996+ }
997+
998+ #[ test]
999+ fn vec_sink_can_reconstruct_file_entries ( ) -> Result {
1000+ let Fixture {
1001+ repo,
1002+ mut resource_cache,
1003+ suspect,
1004+ } = Fixture :: new ( ) ?;
1005+ let source_file_name: BString = "simple.txt" . into ( ) ;
1006+
1007+ let mut streamed_entries = Vec :: new ( ) ;
1008+ incremental (
1009+ & repo,
1010+ suspect,
1011+ None ,
1012+ & mut resource_cache,
1013+ source_file_name. as_ref ( ) ,
1014+ & mut streamed_entries,
1015+ options ( ) ,
1016+ ) ?;
1017+
1018+ streamed_entries. sort_by ( |a, b| a. start_in_blamed_file . cmp ( & b. start_in_blamed_file ) ) ;
1019+ let streamed_entries = coalesce_blame_entries ( streamed_entries) ;
1020+
1021+ let file_entries = file (
1022+ & repo,
1023+ suspect,
1024+ None ,
1025+ & mut resource_cache,
1026+ source_file_name. as_ref ( ) ,
1027+ options ( ) ,
1028+ ) ?
1029+ . entries ;
1030+
1031+ pretty_assertions:: assert_eq!( streamed_entries, file_entries) ;
1032+ Ok ( ( ) )
1033+ }
1034+ }
0 commit comments