From 84105e10d0ce93052cdce677e8472bdcc3c388db Mon Sep 17 00:00:00 2001 From: Micha White Date: Sat, 4 Oct 2025 19:07:22 -0400 Subject: Basic diffing --- Cargo.lock | 26 +++++++++++++ Cargo.toml | 1 + src/calculation.rs | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++-- src/diff.rs | 26 +++++++++++++ src/lib.rs | 2 + src/object.rs | 2 +- 6 files changed, 158 insertions(+), 5 deletions(-) create mode 100644 src/diff.rs diff --git a/Cargo.lock b/Cargo.lock index 228f238..8a1e10e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,31 @@ dependencies = [ "arrayvec", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "imara-diff" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c" +dependencies = [ + "hashbrown", + "memchr", +] + [[package]] name = "itoa" version = "1.0.15" @@ -34,6 +59,7 @@ name = "pj" version = "0.1.0" dependencies = [ "fast-glob", + "imara-diff", "serde", "serde_json", "walkdir", diff --git a/Cargo.toml b/Cargo.toml index ab2500a..f959f9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,4 @@ walkdir = "2" fast-glob = "1" serde = { version = "1", features = ["derive"] } serde_json = "1" +imara-diff = "0.2" diff --git a/src/calculation.rs b/src/calculation.rs index e65e692..64b0f1e 100644 --- a/src/calculation.rs +++ b/src/calculation.rs @@ -1,4 +1,5 @@ use std::collections::{HashSet, VecDeque}; +use std::io::Write; use std::path::PathBuf; use crate::{FileInfo, FilenameOperation, Patch, PatchId, SpanNode, SpanNodeId}; @@ -160,19 +161,40 @@ pub fn conflicting_nodes( Ok(conflicting_nodes) } -pub struct FileContent(Vec); +pub struct FileContent(pub Vec); -pub struct FileContentSpan(Vec); +pub enum FileContentSpan { + Conflicted(ConflictedFileContentSpan), + Unconflicted(UnconflictedFileContentSpan), +} + +pub struct ConflictedFileContentSpan(pub Vec<(String, UnconflictedFileContentSpan)>); + +pub struct UnconflictedFileContentSpan(pub Vec); +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct FileContentNode { + id: SpanNodeId, patch: PatchId, content: Vec, } +fn mergable( + a: &[FileContentNode], + b: &ConflictedFileContentSpan, + get_patch_label: impl Fn(&PatchId) -> String, +) -> bool { + a.iter().all(|node| { + let label_a = get_patch_label(&node.patch); + b.0.iter().any(|(label_b, _)| &label_a == label_b) + }) +} + pub fn file_content( file: &FileInfo, active_patches: &HashSet, get_patch: &impl Fn(&PatchId) -> Result, + get_patch_label: impl Fn(&PatchId) -> String, ) -> Result { let mut output = Vec::new(); let mut completed_nodes = HashSet::new(); @@ -209,13 +231,15 @@ pub fn file_content( let span_contents = Vec::from(&patch.contents[content_start..content_end]); conflicting_nodes.push(FileContentNode { - patch: node.span.patch.clone(), + id: node_id.clone(), + patch: patch.id.clone(), content: span_contents, }); } if !relevant_patches.deletions.is_empty() { conflicting_nodes.push(FileContentNode { + id: node_id.clone(), patch: node.span.patch.clone(), content: Vec::new(), }); @@ -227,7 +251,43 @@ pub fn file_content( } } - output.push(FileContentSpan(conflicting_nodes)); + #[allow(clippy::collapsible_else_if)] + if conflicting_nodes.len() == 1 { + if let Some(FileContentSpan::Unconflicted(node)) = output.last_mut() { + node.0.push(conflicting_nodes[0].clone()); + } else { + output.push(FileContentSpan::Unconflicted(UnconflictedFileContentSpan( + vec![conflicting_nodes[0].clone()], + ))); + } + } else { + if let Some(FileContentSpan::Conflicted(span)) = output.last_mut() + && mergable(&conflicting_nodes, span, &get_patch_label) + { + 'outer: for node_a in conflicting_nodes { + for (label_b, span_b) in &mut span.0 { + let label_a = get_patch_label(&node_a.patch); + if &label_a == label_b { + span_b.0.push(node_a); + continue 'outer; + } + } + } + } else { + output.push(FileContentSpan::Conflicted(ConflictedFileContentSpan( + conflicting_nodes + .iter() + .map(|node| { + ( + get_patch_label(&node.patch), + UnconflictedFileContentSpan(vec![(node.clone())]), + ) + }) + .collect(), + ))); + } + } + conflicting_nodes = Vec::new(); (queue, queue_next) = (queue_next, queue); @@ -236,3 +296,41 @@ pub fn file_content( Ok(FileContent(output)) } + +pub struct FileContentMap { + nodes: Vec<(usize, SpanNodeId)>, +} + +pub fn write_file_content( + writer: &mut impl Write, + labelled_file_content: &FileContent, +) -> std::io::Result { + let mut nodes = Vec::new(); + let mut index = 0; + for span in &labelled_file_content.0 { + match span { + FileContentSpan::Unconflicted(span) => { + for node in &span.0 { + nodes.push((index, node.id.clone())); + index += node.content.len(); + writer.write_all(&node.content)?; + } + } + FileContentSpan::Conflicted(span) => { + for (label, span) in &span.0 { + let prefix = format!("======= {label}\n"); + writer.write_all(prefix.as_bytes())?; + index += prefix.len(); + + for node in &span.0 { + nodes.push((index, node.id.clone())); + writer.write_all(&node.content)?; + index += node.content.len(); + } + } + } + } + } + + Ok(FileContentMap { nodes }) +} diff --git a/src/diff.rs b/src/diff.rs new file mode 100644 index 0000000..0d11400 --- /dev/null +++ b/src/diff.rs @@ -0,0 +1,26 @@ +use std::ops::Range; + +use imara_diff::{Algorithm, Diff, Hunk, InternedInput}; + +pub enum DiffSpan { + Insertion(Range), + Deletion(Range), +} + +pub fn diff(before: &[u8], after: &[u8]) -> Vec { + let input = InternedInput::new(before, after); + let diff = Diff::compute(Algorithm::Histogram, &input); + + let hunks: Vec = diff.hunks().collect(); + let mut spans = Vec::with_capacity(hunks.len()); + for hunk in hunks { + if !hunk.before.is_empty() { + spans.push(DiffSpan::Deletion(hunk.before)); + } + if !hunk.after.is_empty() { + spans.push(DiffSpan::Insertion(hunk.after)); + } + } + + spans +} diff --git a/src/lib.rs b/src/lib.rs index e854a9d..50d43f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![warn(clippy::pedantic)] #![warn(clippy::nursery)] +#![allow(clippy::redundant_else)] use std::collections::HashMap; use std::fs::{File, Metadata}; @@ -9,6 +10,7 @@ use std::time::Instant; use serde::{Deserialize, Serialize}; mod calculation; +mod diff; mod object; mod workarea; diff --git a/src/object.rs b/src/object.rs index e303900..68eec89 100644 --- a/src/object.rs +++ b/src/object.rs @@ -1,8 +1,8 @@ use std::io::{Read, Write}; use std::path::{Path, PathBuf}; -use serde::de::DeserializeOwned; use serde::Serialize; +use serde::de::DeserializeOwned; use crate::{FileInfo, Id, Patch}; -- cgit v1.2.3