diff --git a/Cargo.toml b/Cargo.toml index d76fcfc..fb0e98c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json_diff_ng" -version = "0.3.1" +version = "0.4.0" authors = ["ksceriath", "ChrisRega"] edition = "2021" license = "Unlicense" @@ -25,7 +25,7 @@ path = "src/main.rs" [dependencies] thiserror = "1.0" vg_errortools = "0.1" -serde_json = "1.0" +serde_json = { version = "1.0", features = ["preserve_order"] } maplit = "1.0" clap = {version = "4.4", features = ["derive"]} diffs = "0.5" \ No newline at end of file diff --git a/README.md b/README.md index ec10cbf..3b895d5 100644 --- a/README.md +++ b/README.md @@ -23,5 +23,3 @@ Currently, json-diff is available through crates.io (apart from building this re `$ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh` * Install json-diff `$ cargo install json_diff` - - diff --git a/src/ds/key_node.rs b/src/ds/key_node.rs index dd07987..4c1d69f 100644 --- a/src/ds/key_node.rs +++ b/src/ds/key_node.rs @@ -6,7 +6,6 @@ use std::collections::HashMap; pub enum KeyNode { Nil, Value(Value, Value), - Array(Vec<(usize, KeyNode)>), Node(HashMap), } @@ -38,7 +37,7 @@ impl KeyNode { let max_display_length = max_display_length.unwrap_or(4000); let val_key = |key: Option| { key.map(|mut s| { - s.push_str(" ->"); + s.push_str("->"); s }) .unwrap_or_default() @@ -58,16 +57,7 @@ impl KeyNode { for (key, value) in map { value.absolute_keys( keys, - Some(format!("{} {}", val_key(key_from_root.clone()), key)), - Some(max_display_length), - ) - } - } - KeyNode::Array(vec) => { - for (idx, value) in vec { - value.absolute_keys( - keys, - Some(format!("[l: {}] {}", idx, val_key(key_from_root.clone()))), + Some(format!("{}{}", val_key(key_from_root.clone()), key)), Some(max_display_length), ) } diff --git a/src/ds/mismatch.rs b/src/ds/mismatch.rs index 2e71c5f..e3f3376 100644 --- a/src/ds/mismatch.rs +++ b/src/ds/mismatch.rs @@ -25,6 +25,12 @@ impl Mismatch { } } + pub fn is_empty(&self) -> bool { + self.left_only_keys == KeyNode::Nil + && self.keys_in_both == KeyNode::Nil + && self.right_only_keys == KeyNode::Nil + } + pub fn all_diffs(&self) -> Vec<(DiffType, ValueType)> { self.all_diffs_trunc(None) } diff --git a/src/enums.rs b/src/enums.rs index da15cc7..c9d261e 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -68,7 +68,7 @@ impl Display for ValueType { key, value_right, } => { - write!(f, "{key} {{ {value_left} != {value_right} }}") + write!(f, "{key}{{{value_left}!={value_right}}}") } } } diff --git a/src/main.rs b/src/main.rs index 56abcb5..36a1efb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,10 @@ struct Args { #[command(subcommand)] cmd: Mode, + #[clap(short, long)] + /// deep-sort arrays before comparing + sort_arrays: bool, + #[clap(short, long, default_value_t = 20)] /// truncate keys with more chars then this parameter truncation_length: usize, @@ -36,7 +40,7 @@ fn main() -> Result<(), Error> { } }; - let mismatch = compare_jsons(&json_1, &json_2)?; + let mismatch = compare_jsons(&json_1, &json_2, args.sort_arrays)?; let comparison_result = check_diffs(mismatch)?; if !comparison_result { diff --git a/src/process.rs b/src/process.rs index 4730be6..2d54243 100644 --- a/src/process.rs +++ b/src/process.rs @@ -1,4 +1,5 @@ use diffs::{myers, Diff, Replace}; +use std::borrow::Cow; use std::collections::HashMap; use std::collections::HashSet; @@ -9,10 +10,10 @@ use serde_json::Value; use crate::ds::key_node::KeyNode; use crate::ds::mismatch::Mismatch; -pub fn compare_jsons(a: &str, b: &str) -> Result { +pub fn compare_jsons(a: &str, b: &str, sort_arrays: bool) -> Result { let value1 = serde_json::from_str(a)?; let value2 = serde_json::from_str(b)?; - Ok(match_json(&value1, &value2)) + Ok(match_json(&value1, &value2, sort_arrays)) } fn values_to_node(vec: Vec<(usize, &Value)>) -> KeyNode { if vec.is_empty() { @@ -20,7 +21,7 @@ fn values_to_node(vec: Vec<(usize, &Value)>) -> KeyNode { } else { KeyNode::Node( vec.into_iter() - .map(|(id, val)| (format!("[l: {id}] - {}", val.to_string()), KeyNode::Nil)) + .map(|(id, val)| (format!("[l: {id}]-{}", val), KeyNode::Nil)) .collect(), ) } @@ -60,7 +61,7 @@ impl<'a> Diff for ListDiffHandler<'a> { } } -pub fn match_json(value1: &Value, value2: &Value) -> Mismatch { +pub fn match_json(value1: &Value, value2: &Value, sort_arrays: bool) -> Mismatch { match (value1, value2) { (Value::Object(a), Value::Object(b)) => { let diff = intersect_maps(a, b); @@ -76,7 +77,7 @@ pub fn match_json(value1: &Value, value2: &Value) -> Mismatch { left_only_keys: l, right_only_keys: r, keys_in_both: u, - } = match_json(a.get(&key).unwrap(), b.get(&key).unwrap()); + } = match_json(a.get(&key).unwrap(), b.get(&key).unwrap(), sort_arrays); left_only_keys = insert_child_key_map(left_only_keys, l, &key); right_only_keys = insert_child_key_map(right_only_keys, r, &key); unequal_keys = insert_child_key_map(unequal_keys, u, &key); @@ -86,6 +87,9 @@ pub fn match_json(value1: &Value, value2: &Value) -> Mismatch { } // this clearly needs to be improved! myers algorithm or whatever? (Value::Array(a), Value::Array(b)) => { + let a = preprocess_array(sort_arrays, a); + let b = preprocess_array(sort_arrays, b); + let mut replaced = Vec::new(); let mut deleted = Vec::new(); let mut inserted = Vec::new(); @@ -95,43 +99,55 @@ pub fn match_json(value1: &Value, value2: &Value) -> Mismatch { &mut deleted, &mut inserted, )); - myers::diff(&mut diff, a, 0, a.len(), b, 0, b.len()).unwrap(); - - let mismatch: Vec<_> = replaced - .into_iter() - .flat_map(|(o, ol, n, nl)| { - let max_length = ol.max(nl); - (0..max_length).map(move |i| { - ( - o + i, - match_json( - a.get(o + i).unwrap_or(&Value::Null), - b.get(n + i).unwrap_or(&Value::Null), - ) - .keys_in_both, - ) - }) - }) - .collect(); - - let left_only_values: Vec<_> = deleted - .into_iter() - .flat_map(|(o, ol)| (o..o + ol).map(|i| (i, &a[i]))) - .collect(); - - let right_only_values: Vec<_> = inserted - .into_iter() - .flat_map(|(n, nl)| (n..n + nl).map(|i| (i, &b[i]))) - .collect(); - - let left_only_nodes = values_to_node(left_only_values); - let right_only_nodes = values_to_node(right_only_values); - let mismatch = if mismatch.is_empty() { - KeyNode::Nil - } else { - KeyNode::Array(mismatch) - }; - Mismatch::new(left_only_nodes, right_only_nodes, mismatch) + myers::diff( + &mut diff, + a.as_slice(), + 0, + a.len(), + b.as_slice(), + 0, + b.len(), + ) + .unwrap(); + + fn extract_one_sided_values( + v: Vec<(usize, usize)>, + vals: &[Value], + ) -> Vec<(usize, &Value)> { + v.into_iter() + .flat_map(|(o, ol)| (o..o + ol).map(|i| (i, &vals[i]))) + .collect::>() + } + + let left_only_values: Vec<_> = extract_one_sided_values(deleted, a.as_slice()); + let right_only_values: Vec<_> = extract_one_sided_values(inserted, b.as_slice()); + + let mut left_only_nodes = values_to_node(left_only_values); + let mut right_only_nodes = values_to_node(right_only_values); + let mut diff = KeyNode::Nil; + + for (o, ol, n, nl) in replaced { + let max_length = ol.max(nl); + for i in 0..max_length { + let inner_a = a.get(o + i).unwrap_or(&Value::Null); + let inner_b = b.get(n + i).unwrap_or(&Value::Null); + + let cdiff = match_json(inner_a, inner_b, sort_arrays); + let position = o + i; + let Mismatch { + left_only_keys: l, + right_only_keys: r, + keys_in_both: u, + } = cdiff; + left_only_nodes = + insert_child_key_map(left_only_nodes, l, &format!("[l: {position}]")); + right_only_nodes = + insert_child_key_map(right_only_nodes, r, &format!("[l: {position}]")); + diff = insert_child_key_map(diff, u, &format!("[l: {position}]")); + } + } + + Mismatch::new(left_only_nodes, right_only_nodes, diff) } (a, b) => { if a == b { @@ -147,6 +163,72 @@ pub fn match_json(value1: &Value, value2: &Value) -> Mismatch { } } +fn preprocess_array(sort_arrays: bool, a: &Vec) -> Cow> { + if sort_arrays { + let mut owned = a.to_owned(); + owned.sort_by(compare_values); + Cow::Owned(owned) + } else { + Cow::Borrowed(a) + } +} + +fn compare_values(a: &Value, b: &Value) -> std::cmp::Ordering { + match (a, b) { + (Value::Null, Value::Null) => std::cmp::Ordering::Equal, + (Value::Null, _) => std::cmp::Ordering::Less, + (_, Value::Null) => std::cmp::Ordering::Greater, + (Value::Bool(a), Value::Bool(b)) => a.cmp(b), + (Value::Number(a), Value::Number(b)) => { + if let (Some(a), Some(b)) = (a.as_i64(), b.as_i64()) { + return a.cmp(&b); + } + if let (Some(a), Some(b)) = (a.as_f64(), b.as_f64()) { + return a.partial_cmp(&b).unwrap_or(std::cmp::Ordering::Equal); + } + // Handle other number types if needed + std::cmp::Ordering::Equal + } + (Value::String(a), Value::String(b)) => a.cmp(b), + (Value::Array(a), Value::Array(b)) => { + for (a, b) in a.iter().zip(b.iter()) { + let cmp = compare_values(a, b); + if cmp != std::cmp::Ordering::Equal { + return cmp; + } + } + a.len().cmp(&b.len()) + } + (Value::Object(a), Value::Object(b)) => { + let mut keys_a: Vec<_> = a.keys().collect(); + let mut keys_b: Vec<_> = b.keys().collect(); + keys_a.sort(); + keys_b.sort(); + for (key_a, key_b) in keys_a.iter().zip(keys_b.iter()) { + let cmp = key_a.cmp(key_b); + if cmp != std::cmp::Ordering::Equal { + return cmp; + } + let value_a = &a[*key_a]; + let value_b = &b[*key_b]; + let cmp = compare_values(value_a, value_b); + if cmp != std::cmp::Ordering::Equal { + return cmp; + } + } + keys_a.len().cmp(&keys_b.len()) + } + (Value::Object(_), _) => std::cmp::Ordering::Less, + (_, Value::Object(_)) => std::cmp::Ordering::Greater, + (Value::Bool(_), _) => std::cmp::Ordering::Less, + (_, Value::Bool(_)) => std::cmp::Ordering::Greater, + (Value::Number(_), _) => std::cmp::Ordering::Less, + (_, Value::Number(_)) => std::cmp::Ordering::Greater, + (Value::String(_), _) => std::cmp::Ordering::Less, + (_, Value::String(_)) => std::cmp::Ordering::Greater, + } +} + fn get_map_of_keys(set: Option>) -> KeyNode { if let Some(set) = set { KeyNode::Node( @@ -227,26 +309,88 @@ mod tests { use maplit::hashmap; use serde_json::json; + #[test] + fn test_arrays_sorted_simple() { + let data1 = r#"["a","b","c"]"#; + let data2 = r#"["b","c","a"]"#; + let diff = compare_jsons(data1, data2, true).unwrap(); + assert!(diff.is_empty()); + } + + #[test] + fn test_arrays_sorted_objects() { + let data1 = r#"[{"c": {"d": "e"} },"b","c"]"#; + let data2 = r#"["b","c",{"c": {"d": "e"} }]"#; + let diff = compare_jsons(data1, data2, true).unwrap(); + assert!(diff.is_empty()); + } + + #[test] + fn test_arrays_deep_sorted_objects() { + let data1 = r#"[{"c": ["d","e"] },"b","c"]"#; + let data2 = r#"["b","c",{"c": ["e", "d"] }]"#; + let diff = compare_jsons(data1, data2, true).unwrap(); + assert!(diff.is_empty()); + } + + #[test] + fn test_arrays_deep_sorted_objects_with_outer_diff() { + let data1 = r#"[{"c": ["d","e"] },"b"]"#; + let data2 = r#"["b","c",{"c": ["e", "d"] }]"#; + let diff = compare_jsons(data1, data2, true).unwrap(); + assert!(!diff.is_empty()); + let insertions = diff.right_only_keys.absolute_keys_to_vec(None); + assert_eq!(insertions.len(), 1); + assert_eq!(insertions.first().unwrap().to_string(), r#"[l: 2]-"c""#); + } + + #[test] + fn test_arrays_deep_sorted_objects_with_inner_diff() { + let data1 = r#"["a",{"c": ["d","e", "f"] },"b"]"#; + let data2 = r#"["b",{"c": ["e","d"] },"a"]"#; + let diff = compare_jsons(data1, data2, true).unwrap(); + assert!(!diff.is_empty()); + let deletions = diff.left_only_keys.absolute_keys_to_vec(None); + + assert_eq!(deletions.len(), 1); + assert_eq!( + deletions.first().unwrap().to_string(), + r#"[l: 0]->c->[l: 2]-"f""# + ); + } + + #[test] + fn test_arrays_deep_sorted_objects_with_inner_diff_mutation() { + let data1 = r#"["a",{"c": ["d", "f"] },"b"]"#; + let data2 = r#"["b",{"c": ["e","d"] },"a"]"#; + let diff = compare_jsons(data1, data2, true).unwrap(); + assert!(!diff.is_empty()); + let diffs = diff.keys_in_both.absolute_keys_to_vec(None); + + assert_eq!(diffs.len(), 1); + assert_eq!( + diffs.first().unwrap().to_string(), + r#"[l: 0]->c->[l: 1]->{"f"!="e"}"# + ); + } + #[test] fn test_arrays_simple_diff() { let data1 = r#"["a","b","c"]"#; let data2 = r#"["a","b","d"]"#; - let diff = compare_jsons(data1, data2).unwrap(); + let diff = compare_jsons(data1, data2, false).unwrap(); assert_eq!(diff.left_only_keys, KeyNode::Nil); assert_eq!(diff.right_only_keys, KeyNode::Nil); let diff = diff.keys_in_both.absolute_keys_to_vec(None); assert_eq!(diff.len(), 1); - assert_eq!( - diff.first().unwrap().to_string(), - r#"[l: 2] -> { "c" != "d" }"# - ); + assert_eq!(diff.first().unwrap().to_string(), r#"[l: 2]->{"c"!="d"}"#); } #[test] fn test_arrays_more_complex_diff() { let data1 = r#"["a","b","c"]"#; let data2 = r#"["a","a","b","d"]"#; - let diff = compare_jsons(data1, data2).unwrap(); + let diff = compare_jsons(data1, data2, false).unwrap(); let changes_diff = diff.keys_in_both.absolute_keys_to_vec(None); assert_eq!(diff.left_only_keys, KeyNode::Nil); @@ -254,22 +398,22 @@ mod tests { assert_eq!(changes_diff.len(), 1); assert_eq!( changes_diff.first().unwrap().to_string(), - r#"[l: 2] -> { "c" != "d" }"# + r#"[l: 2]->{"c"!="d"}"# ); let insertions = diff.right_only_keys.absolute_keys_to_vec(None); assert_eq!(insertions.len(), 1); - assert_eq!(insertions.first().unwrap().to_string(), r#" [l: 0] - "a""#); + assert_eq!(insertions.first().unwrap().to_string(), r#"[l: 0]-"a""#); } #[test] fn test_arrays_extra_left() { let data1 = r#"["a","b","c"]"#; let data2 = r#"["a","b"]"#; - let diff = compare_jsons(data1, data2).unwrap(); + let diff = compare_jsons(data1, data2, false).unwrap(); let diffs = diff.left_only_keys.absolute_keys_to_vec(None); assert_eq!(diffs.len(), 1); - assert_eq!(diffs.first().unwrap().to_string(), r#" [l: 2] - "c""#); + assert_eq!(diffs.first().unwrap().to_string(), r#"[l: 2]-"c""#); assert_eq!(diff.keys_in_both, KeyNode::Nil); assert_eq!(diff.right_only_keys, KeyNode::Nil); } @@ -278,11 +422,11 @@ mod tests { fn test_arrays_extra_right() { let data1 = r#"["a","b"]"#; let data2 = r#"["a","b","c"]"#; - let diff = compare_jsons(data1, data2).unwrap(); + let diff = compare_jsons(data1, data2, false).unwrap(); let diffs = diff.right_only_keys.absolute_keys_to_vec(None); assert_eq!(diffs.len(), 1); - assert_eq!(diffs.first().unwrap().to_string(), r#" [l: 2] - "c""#); + assert_eq!(diffs.first().unwrap().to_string(), r#"[l: 2]-"c""#); assert_eq!(diff.keys_in_both, KeyNode::Nil); assert_eq!(diff.left_only_keys, KeyNode::Nil); } @@ -291,14 +435,14 @@ mod tests { fn long_insertion_modification() { let data1 = r#"["a","b","a"]"#; let data2 = r#"["a","c","c","c","a"]"#; - let diff = compare_jsons(data1, data2).unwrap(); + let diff = compare_jsons(data1, data2, false).unwrap(); let diffs = diff.keys_in_both.absolute_keys_to_vec(None); - // 1. is b!=c, second is a!=c, third is missing in data1 but c in data2 + assert_eq!(diffs.len(), 3); - assert_eq!( - diffs.last().unwrap().to_string(), - r#"[l: 3] -> { null != "c" }"# - ); + let diffs: Vec<_> = diffs.into_iter().map(|d| d.to_string()).collect(); + assert!(diffs.contains(&r#"[l: 3]->{null!="c"}"#.to_string())); + assert!(diffs.contains(&r#"[l: 1]->{"b"!="c"}"#.to_string())); + assert!(diffs.contains(&r#"[l: 2]->{"a"!="c"}"#.to_string())); assert_eq!(diff.right_only_keys, KeyNode::Nil); assert_eq!(diff.left_only_keys, KeyNode::Nil); } @@ -307,13 +451,13 @@ mod tests { fn test_arrays_object_extra() { let data1 = r#"["a","b"]"#; let data2 = r#"["a","b", {"c": {"d": "e"} }]"#; - let diff = compare_jsons(data1, data2).unwrap(); + let diff = compare_jsons(data1, data2, false).unwrap(); let diffs = diff.right_only_keys.absolute_keys_to_vec(None); assert_eq!(diffs.len(), 1); assert_eq!( diffs.first().unwrap().to_string(), - r#" [l: 2] - {"c":{"d":"e"}}"# + r#"[l: 2]-{"c":{"d":"e"}}"# ); assert_eq!(diff.keys_in_both, KeyNode::Nil); assert_eq!(diff.left_only_keys, KeyNode::Nil); @@ -390,7 +534,7 @@ mod tests { }); let expected = Mismatch::new(expected_left, expected_right, expected_uneq); - let mismatch = compare_jsons(data1, data2).unwrap(); + let mismatch = compare_jsons(data1, data2, false).unwrap(); assert_eq!(mismatch, expected, "Diff was incorrect."); } @@ -426,7 +570,7 @@ mod tests { }"#; assert_eq!( - compare_jsons(data1, data2).unwrap(), + compare_jsons(data1, data2, false).unwrap(), Mismatch::new(KeyNode::Nil, KeyNode::Nil, KeyNode::Nil) ); } @@ -437,7 +581,7 @@ mod tests { let data2 = r#"{}"#; assert_eq!( - compare_jsons(data1, data2).unwrap(), + compare_jsons(data1, data2, false).unwrap(), Mismatch::new(KeyNode::Nil, KeyNode::Nil, KeyNode::Nil) ); } @@ -446,7 +590,7 @@ mod tests { fn parse_err_source_one() { let invalid_json1 = r#"{invalid: json}"#; let valid_json2 = r#"{"a":"b"}"#; - match compare_jsons(invalid_json1, valid_json2) { + match compare_jsons(invalid_json1, valid_json2, false) { Ok(_) => panic!("This shouldn't be an Ok"), Err(err) => { matches!(err, Error::JSON(_)); @@ -458,7 +602,7 @@ mod tests { fn parse_err_source_two() { let valid_json1 = r#"{"a":"b"}"#; let invalid_json2 = r#"{invalid: json}"#; - match compare_jsons(valid_json1, invalid_json2) { + match compare_jsons(valid_json1, invalid_json2, false) { Ok(_) => panic!("This shouldn't be an Ok"), Err(err) => { matches!(err, Error::JSON(_));