Skip to content

Commit

Permalink
New crate for calculating hunk dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
mtsgrd committed Oct 18, 2024
1 parent 45467f3 commit 179bcb9
Show file tree
Hide file tree
Showing 12 changed files with 853 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ members = [
"crates/gitbutler-stack-api",
"crates/gitbutler-stack",
"crates/gitbutler-patch-reference",
"crates/gitbutler-hunk-dependency",
]
resolver = "2"

Expand Down Expand Up @@ -92,6 +93,7 @@ gitbutler-oxidize = { path = "crates/gitbutler-oxidize" }
gitbutler-stack-api = { path = "crates/gitbutler-stack-api" }
gitbutler-stack = { path = "crates/gitbutler-stack" }
gitbutler-patch-reference = { path = "crates/gitbutler-patch-reference" }
gitbutler-hunk-dependency = { path = "crates/gitbutler-hunk-dependency" }

[profile.release]
codegen-units = 1 # Compile crates one after another so the compiler can optimize better
Expand Down
23 changes: 23 additions & 0 deletions crates/gitbutler-hunk-dependency/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "gitbutler-hunk-dependency"
version = "0.0.0"
edition = "2021"
authors = ["GitButler <[email protected]>"]
publish = false

[dependencies]
anyhow = "1.0.86"
git2.workspace = true
gix = { workspace = true, features = [] }
gitbutler-reference.workspace = true
gitbutler-stack.workspace = true
gitbutler-id.workspace = true
itertools = "0.13"
serde = { workspace = true, features = ["std"] }
bstr.workspace = true
tokio.workspace = true
uuid = { workspace = true, features = ["v4", "fast-rng"] }

[[test]]
name = "blame"
path = "tests/mod.rs"
125 changes: 125 additions & 0 deletions crates/gitbutler-hunk-dependency/src/builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
use std::{
collections::HashMap,
path::{Path, PathBuf},
};

use gitbutler_stack::StackId;

use crate::{diff::Diff, hunk::DependencyHunk, stack::DependencyStack};

/// Calculates dependencies between workspace changes and workspace commits.
///
/// What we ultimately want to understand is, given an uncommitted change
/// in some file, do the old line numbers intersect with any commmit(s) in
/// the workspace?
///
/// The problem we have to overcome is that we the workspace changes are
/// produced by diffing the working directory against the workspace commit.
/// It means changes from one stack can offset line numbers in changes from
/// a different stack. The most intuitive way of checking if they touch
/// the same lines is to use regular git blame, but it suffers from two
/// problems, 1) speed and 2) lack of --reverse flag in git2. The latter
/// means we can't detect intersections with deleted lines.
///
/// If we don't calculate these dependencies correctly it means a user
/// might be able to move a hunk into a stack where it cannot be committed.
///
/// So the solution here is that we build up the same information we would
/// get from blame by adding diffs together.
#[derive(Debug, Default, PartialEq, Clone)]
pub struct HunkDependencyBuilder {
stacks: HashMap<StackId, DependencyStack>,
}

impl HunkDependencyBuilder {
pub fn add(
&mut self,
stack_id: StackId,
commit_id: git2::Oid,
path: &PathBuf,
diffs: Vec<Diff>,
) -> anyhow::Result<()> {
if let Some(lane) = self.stacks.get_mut(&stack_id) {
lane.add(stack_id, commit_id, path, diffs)?;
} else {
let mut lane_deps = DependencyStack::default();
lane_deps.add(stack_id, commit_id, path, diffs)?;
self.stacks.insert(stack_id, lane_deps);
}
Ok(())
}

/// Gets an object that can be used to lookup dependencies for a given path.
///
/// The reasoning for combining the stacks/lanes here, rather than including
/// it where diffs are combined within the branch, is/was to keep the logic
/// simple. In iterating on the code, however, it feels like it might make
/// more sense to go directly to "global" line numbers.
///
/// The constraint we would need to introduce is that diffs from different
/// stacks cannot intersect with each other. Doing so would mean the workspace
/// is corrupt.
///
/// TODO: Consider moving most of the code below to path.rs
pub fn get_path(&mut self, path: &Path) -> anyhow::Result<PathDependencyLookup> {
let paths = self
.stacks
.values()
.filter(|s| s.contains_path(path))
.filter_map(|value| value.get_path(path))
.collect::<Vec<_>>();
// Tracks the cumulative lines added/removed.
let mut line_shift = 0;
// Next hunk to consider for each branch containing path.
let mut hunk_indexes: Vec<usize> = vec![0; paths.len()];
let mut result = vec![];

loop {
let start_lines = paths
.iter()
.enumerate()
.map(|(i, path_dep)| path_dep.hunks.get(hunk_indexes[i]))
.map(|hunk_dep| hunk_dep.map(|hunk_dep| hunk_dep.start as u32))
.collect::<Vec<_>>();

// Find the index of the dependency path with the lowest start line.
let path_index = start_lines
.iter()
.enumerate() // We want to filter out None values, but keep their index.
.filter(|(_, start_line)| start_line.is_some())
.min_by_key(|&(index, &value)| value.unwrap() + start_lines[index].unwrap_or(0))
.map(|(index, _)| index);

if path_index.is_none() {
break; // No more items to process.
}
let path_index = path_index.unwrap();
let hunk_index = hunk_indexes[path_index];
hunk_indexes[path_index] += 1;

let path_dep = &paths[path_index];
let hunk_dep = &path_dep.hunks[hunk_index];

result.push(DependencyHunk {
start: hunk_dep.start + line_shift,
..hunk_dep.clone()
});
line_shift += hunk_dep.line_shift;
}
Ok(PathDependencyLookup { hunk_deps: result })
}
}

#[derive(Debug, Default, PartialEq, Clone)]
pub struct PathDependencyLookup {
hunk_deps: Vec<DependencyHunk>,
}

impl PathDependencyLookup {
pub fn find(self, start: i32, lines: i32) -> Vec<DependencyHunk> {
self.hunk_deps
.into_iter()
.filter(|hunk| hunk.intersects(start, lines))
.collect::<Vec<_>>()
}
}
76 changes: 76 additions & 0 deletions crates/gitbutler-hunk-dependency/src/diff.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use anyhow::{anyhow, Context};

#[derive(Debug, PartialEq, Clone)]
pub struct Diff {
pub old_start: i32,
pub old_lines: i32,
pub new_start: i32,
pub new_lines: i32,
}

impl Diff {
pub fn net_lines(&self) -> i32 {
self.new_lines - self.old_lines
}
}

fn count_context_lines<I, S>(iter: I) -> i32
where
I: Iterator<Item = S>,
S: AsRef<str>,
{
iter.take_while(|line| {
let line_ref = line.as_ref(); // Convert to &str
!line_ref.starts_with('-') && !line_ref.starts_with('+')
})
.fold(0i32, |acc, _| acc + 1)
}

impl TryFrom<String> for Diff {
fn try_from(value: String) -> Result<Self, anyhow::Error> {
parse_unidiff(value)
}

type Error = anyhow::Error;
}

impl TryFrom<&str> for Diff {
fn try_from(value: &str) -> Result<Self, anyhow::Error> {
parse_unidiff(value)
}

type Error = anyhow::Error;
}

fn parse_unidiff(value: impl AsRef<str>) -> Result<Diff, anyhow::Error> {
let value = value.as_ref();
let header = value.lines().next().context("No header found")?;
if !header.starts_with("@@") {
return Err(anyhow!("Malformed undiff"));
}
let parts: Vec<&str> = header.split_whitespace().collect();
let (old_start, old_lines) = parse_hunk_info(parts[1]);
let (new_start, new_lines) = parse_hunk_info(parts[2]);
let head_context_lines = count_context_lines(value.lines().skip(1).take(3));
let tail_context_lines = count_context_lines(value.rsplit_terminator('\n').take(3));
let context_lines = head_context_lines + tail_context_lines;

Ok(Diff {
old_start: old_start + head_context_lines,
old_lines: old_lines - context_lines,
new_start: new_start + head_context_lines,
new_lines: new_lines - context_lines,
})
}

fn parse_hunk_info(hunk_info: &str) -> (i32, i32) {
let hunk_info = hunk_info.trim_start_matches(&['-', '+'][..]); // Remove the leading '-' or '+'
let parts: Vec<&str> = hunk_info.split(',').collect();
let start = parts[0].parse().unwrap();
let lines = if parts.len() > 1 {
parts[1].parse().unwrap()
} else {
1
};
(start, lines)
}
24 changes: 24 additions & 0 deletions crates/gitbutler-hunk-dependency/src/hunk.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use gitbutler_stack::StackId;

#[derive(Debug, PartialEq, Clone)]
pub struct DependencyHunk {
pub stack_id: StackId,
pub commit_id: git2::Oid,
pub start: i32,
pub lines: i32,
pub line_shift: i32,
}

impl DependencyHunk {
fn end(&self) -> i32 {
self.start + self.lines - 1
}

pub fn intersects(&self, start: i32, lines: i32) -> bool {
self.end() >= start && self.start < start + lines
}

pub fn contains(&self, start: i32, lines: i32) -> bool {
start > self.start && start + lines <= self.end()
}
}
5 changes: 5 additions & 0 deletions crates/gitbutler-hunk-dependency/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod builder;
pub mod diff;
pub mod hunk;
pub mod path;
pub mod stack;
Loading

0 comments on commit 179bcb9

Please sign in to comment.