diff --git a/examples/strings/suf_ary_cmp_aizu.rs b/examples/strings/suf_ary_cmp_aizu.rs index 196d2bd1..94975235 100644 --- a/examples/strings/suf_ary_cmp_aizu.rs +++ b/examples/strings/suf_ary_cmp_aizu.rs @@ -7,26 +7,23 @@ use programming_team_code_rust::strings::suf_ary::SufAry; fn main() { input! { n: usize, - mut a: [i32; n], + mut a: [usize; n], m: usize, - b: [i32; m] + b: [usize; m] } let are_equal = a == b; - a.push(-1); a.extend(b); let (cmps, max_val) = compress(&a); let suf_ary = SufAry::new(&cmps, max_val); - let res = suf_ary.cmp_substrs(0..n, n + 1..n + 1 + m); + let res = suf_ary.cmp_substrs(0..n, n..n + m); if are_equal { assert_eq!(res, std::cmp::Ordering::Equal); - assert_eq!(suf_ary.cmp_sufs(0, n + 1), std::cmp::Ordering::Greater); println!("0"); } else { - assert_eq!(res, suf_ary.cmp_sufs(0, n + 1)); println!("{}", (res == std::cmp::Ordering::Less) as usize); } } diff --git a/examples/strings/suf_ary_find_many_strs_aizu.rs b/examples/strings/suf_ary_find_many_strs_aizu.rs index 466a47dc..d07358d3 100644 --- a/examples/strings/suf_ary_find_many_strs_aizu.rs +++ b/examples/strings/suf_ary_find_many_strs_aizu.rs @@ -3,6 +3,9 @@ use proconio::input; use programming_team_code_rust::strings::suf_ary::SufAry; +mod suf_ary_push_pop_char_asserts; +use suf_ary_push_pop_char_asserts::suf_ary_push_pop_char_asserts; + fn main() { input! { s: String, @@ -16,8 +19,9 @@ fn main() { input! { t: String } - let t_vec = t.chars().map(|x| x as usize).collect::>(); - let range = suf_ary.find_str(&t_vec); + let t = t.chars().map(|x| x as usize).collect::>(); + let range = suf_ary.find_str(&t); + suf_ary_push_pop_char_asserts(s.len(), &suf_ary, &range, &t); println!("{}", !range.is_empty() as usize); } } diff --git a/examples/strings/suf_ary_find_str_aizu.rs b/examples/strings/suf_ary_find_str_aizu.rs index 9cc8846c..a11aee9f 100644 --- a/examples/strings/suf_ary_find_str_aizu.rs +++ b/examples/strings/suf_ary_find_str_aizu.rs @@ -3,17 +3,23 @@ use proconio::input; use programming_team_code_rust::strings::suf_ary::SufAry; +mod suf_ary_push_pop_char_asserts; +use suf_ary_push_pop_char_asserts::suf_ary_push_pop_char_asserts; + fn main() { input! { s: String, t: String } - let s_vec = s.chars().map(|x| x as usize).collect::>(); - let t_vec = t.chars().map(|x| x as usize).collect::>(); + let s = s.chars().map(|x| x as usize).collect::>(); + let t = t.chars().map(|x| x as usize).collect::>(); + + let suf_ary = SufAry::new(&s, 255); + let range = suf_ary.find_str(&t); + + suf_ary_push_pop_char_asserts(s.len(), &suf_ary, &range, &t); - let suf_ary = SufAry::new(&s_vec, 255); - let range = suf_ary.find_str(&t_vec); let mut res: Vec = Vec::new(); res.extend_from_slice(&suf_ary.sa[range]); res.sort(); diff --git a/examples/strings/suf_ary_find_substr_aizu.rs b/examples/strings/suf_ary_find_substr_aizu.rs index 9f41f620..790267af 100644 --- a/examples/strings/suf_ary_find_substr_aizu.rs +++ b/examples/strings/suf_ary_find_substr_aizu.rs @@ -3,6 +3,9 @@ use proconio::input; use programming_team_code_rust::strings::suf_ary::SufAry; +mod suf_ary_push_pop_substr_asserts; +use suf_ary_push_pop_substr_asserts::suf_ary_push_pop_substr_asserts; + fn main() { input! { s: String, @@ -14,7 +17,11 @@ fn main() { both.extend(s.chars().map(|x| x as usize).collect::>()); let suf_ary = SufAry::new(&both, 255); - let mut res = suf_ary.sa[suf_ary.find_substr(0..m)] + let range = suf_ary.find_substr(0..m); + + suf_ary_push_pop_substr_asserts(both.len(), &suf_ary, &range, &(0..m)); + + let mut res = suf_ary.sa[range] .iter() .copied() .filter(|&i| i >= m) diff --git a/examples/strings/suf_ary_find_substr_many_aizu.rs b/examples/strings/suf_ary_find_substr_many_aizu.rs index dd811ebc..5ae34f33 100644 --- a/examples/strings/suf_ary_find_substr_many_aizu.rs +++ b/examples/strings/suf_ary_find_substr_many_aizu.rs @@ -4,13 +4,16 @@ use proconio::input; use programming_team_code_rust::data_structures::rmq::RMQ; use programming_team_code_rust::strings::suf_ary::SufAry; +mod suf_ary_push_pop_substr_asserts; +use suf_ary_push_pop_substr_asserts::suf_ary_push_pop_substr_asserts; + fn main() { input! { s: String, q: usize } - let num_queries_find_substr = q.min(100); + let num_queries_find_substr = q.min(3); let mut s = s.chars().map(|x| x as usize).collect::>(); @@ -29,7 +32,11 @@ fn main() { let rmq = RMQ::new(&suf_ary.sa, std::cmp::min); for i in 0..num_queries_find_substr { - let idx = rmq.query(suf_ary.find_substr(length[i]..length[i + 1])); + let s_to_look_for_range = length[i]..length[i + 1]; + let range = suf_ary.find_substr(s_to_look_for_range.clone()); + suf_ary_push_pop_substr_asserts(s.len(), &suf_ary, &range, &s_to_look_for_range); + + let idx = rmq.query(range); println!( "{}", (idx + length[i + 1] - length[i] <= length[0]) as usize diff --git a/examples/strings/suf_ary_push_pop_char_asserts.rs b/examples/strings/suf_ary_push_pop_char_asserts.rs new file mode 100644 index 00000000..d13984a5 --- /dev/null +++ b/examples/strings/suf_ary_push_pop_char_asserts.rs @@ -0,0 +1,41 @@ +use programming_team_code_rust::strings::suf_ary::SufAry; +use std::ops::Range; +pub fn suf_ary_push_pop_char_asserts( + n: usize, + suf_ary: &SufAry, + range: &Range, + t: &[usize], +) { + let mut push_pop_sa_range = 0..n; + let mut push_pop_t_range = t.len() / 2..t.len() / 2; + loop { + let mut found = false; + + if push_pop_t_range.start > 0 { + found = true; + let lcp_len = push_pop_t_range.len(); + push_pop_t_range.start -= 1; + push_pop_sa_range = + suf_ary.push_front_char(t[push_pop_t_range.start], push_pop_sa_range, lcp_len); + } + + if push_pop_t_range.end < t.len() { + found = true; + push_pop_sa_range = suf_ary.push_back_char( + t[push_pop_t_range.end], + push_pop_sa_range, + push_pop_t_range.len(), + ); + push_pop_t_range.end += 1; + } + + if !found { + break; + } + } + + assert_eq!(range.is_empty(), push_pop_sa_range.is_empty()); + if !range.is_empty() { + assert_eq!(*range, push_pop_sa_range); + } +} diff --git a/examples/strings/suf_ary_push_pop_substr_asserts.rs b/examples/strings/suf_ary_push_pop_substr_asserts.rs new file mode 100644 index 00000000..97c3b655 --- /dev/null +++ b/examples/strings/suf_ary_push_pop_substr_asserts.rs @@ -0,0 +1,52 @@ +use programming_team_code_rust::strings::suf_ary::SufAry; +use std::ops::Range; +pub fn suf_ary_push_pop_substr_asserts( + tot_len: usize, + suf_ary: &SufAry, + range: &Range, + s_to_look_for_range: &Range, +) { + assert_eq!( + suf_ary.push_front_substr(0..tot_len, tot_len..tot_len, 0), + tot_len..tot_len + ); + let mut splits = (0..6) + .map(|_| { + s_to_look_for_range.start + (rand::random::() % (s_to_look_for_range.len() + 1)) + }) + .collect::>(); + splits.push(s_to_look_for_range.start); + splits.push(s_to_look_for_range.end); + splits.sort(); + let subarrays = (1..splits.len()) + .map(|i| splits[i - 1]..splits[i]) + .collect::>(); + let mut push_pop_range = 0..tot_len; + let mut push_pop_lcp_len = 0; + let mut range_subarray = subarrays.len() / 2..subarrays.len() / 2; + loop { + let mut found = false; + if range_subarray.start > 0 { + found = true; + range_subarray.start -= 1; + let curr_substr = &subarrays[range_subarray.start]; + push_pop_range = + suf_ary.push_front_substr(curr_substr.clone(), push_pop_range, push_pop_lcp_len); + push_pop_lcp_len += curr_substr.len(); + } + + if range_subarray.end < subarrays.len() { + found = true; + let curr_substr = &subarrays[range_subarray.end]; + push_pop_range = + suf_ary.push_back_substr(curr_substr.clone(), push_pop_range, push_pop_lcp_len); + push_pop_lcp_len += curr_substr.len(); + range_subarray.end += 1; + } + + if !found { + break; + } + } + assert_eq!(*range, push_pop_range); +} diff --git a/src/strings/suf_ary.rs b/src/strings/suf_ary.rs index af6a217b..1d9a126d 100644 --- a/src/strings/suf_ary.rs +++ b/src/strings/suf_ary.rs @@ -33,7 +33,7 @@ use ac_library::string::{lcp_array_arbitrary, suffix_array_manual}; /// // || /// // 2 nana 5 /// -/// let suf_ary1 = SufAry::new(&s.chars().map(|c| c as usize).collect::>(), 255); +/// let suf_ary1 = SufAry::new(&s.chars().map(|c| c as usize).collect::>(), 255); /// let n = suf_ary1.sa.len(); /// assert_eq!(suf_ary1.sa, [5, 3, 1, 0, 4, 2]); /// assert_eq!(suf_ary1.sa_inv, [3, 2, 5, 1, 4, 0]); @@ -44,20 +44,30 @@ use ac_library::string::{lcp_array_arbitrary, suffix_array_manual}; /// let suf_ary2 = SufAry::new(&a_comp, max_val); /// /// assert_eq!(suf_ary1.len_lcp(1, 3), 3); -/// assert!(std::panic::catch_unwind(|| suf_ary1.len_lcp(1, n)).is_err()); /// /// assert_eq!(suf_ary1.cmp_sufs(1, 3), Ordering::Greater); /// assert!(std::panic::catch_unwind(|| suf_ary1.cmp_sufs(n, 2)).is_err()); /// /// assert_eq!(suf_ary1.cmp_substrs(1..4, 3..6), Ordering::Equal); -/// assert!(std::panic::catch_unwind(|| suf_ary1.cmp_substrs(3..4, n..n)).is_err()); /// /// assert_eq!(suf_ary1.find_str(&"ana".chars().map(|c| c as usize).collect::>()), 1..3); /// assert_eq!(suf_ary1.find_str(&[]), 0..n); /// /// assert_eq!(suf_ary1.find_substr(1..4), 1..3); -/// assert_eq!(suf_ary1.find_substr(1..1), 0..n); +/// assert_eq!(suf_ary1.find_substr(2..2), 0..n); /// assert!(std::panic::catch_unwind(|| suf_ary1.find_substr(n..n)).is_err()); +/// +/// assert_eq!(suf_ary1.push_back_char('n' as usize, 0..3, 1), 1..3); +/// assert_eq!(suf_ary1.push_back_char('n' as usize, n..n, 0), n..n); +/// +/// assert_eq!(suf_ary1.push_front_char('a' as usize, 4..6, 2), 1..3); +/// assert_eq!(suf_ary1.push_front_char('a' as usize, n..n, 0), n..n); +/// +/// assert_eq!(suf_ary1.push_back_substr(4..6, 0..3, 1), 1..3); +/// assert_eq!(suf_ary1.push_back_substr(4..6, n..n, 0), n..n); +/// +/// assert_eq!(suf_ary1.push_front_substr(3..5, 0..3, 1), 1..3); +/// assert_eq!(suf_ary1.push_front_substr(3..5, n..n, 0), n..n); /// ``` pub struct SufAry { n: usize, @@ -69,6 +79,7 @@ pub struct SufAry { /// longest common prefix array pub lcp: Vec, rmq: RMQ usize>, + cnt: Vec, } impl SufAry { @@ -87,6 +98,13 @@ impl SufAry { for (i, &elem) in sa.iter().enumerate() { sa_inv[elem] = i; } + let mut cnt = vec![0; max_val + 1]; + for &c in s { + cnt[c + 1] += 1; + } + for i in 1..cnt.len() { + cnt[i] += cnt[i - 1]; + } Self { n: sa.len(), s: s.to_vec(), @@ -94,6 +112,7 @@ impl SufAry { rmq: RMQ::new(&lcp, std::cmp::min), lcp, sa, + cnt, } } @@ -103,8 +122,9 @@ impl SufAry { /// - Time: O(1) /// - Space: O(1) pub fn len_lcp(&self, i1: usize, i2: usize) -> usize { - if i1 == i2 { - return self.n - i1; + let mx = std::cmp::max(i1, i2); + if i1 == i2 || mx == self.n { + return self.n - mx; } let (mut le, mut ri) = (self.sa_inv[i1], self.sa_inv[i2]); if le > ri { @@ -150,19 +170,123 @@ impl SufAry { } /// Gets range r such that: - /// - for all i in sa\[r\] s\[i..i + substr.len()\] == s\[substr\] - /// - r.len() is the number of matches of s\[substr\] in s + /// - for all i in sa\[r\] s\[i..i + s_substr.len()\] == s\[s_substr\] + /// - r.len() is the number of matches of s\[s_substr\] in s /// /// # Complexity /// - Time: O(log(|s|)) /// - Space: O(1) - pub fn find_substr(&self, substr: Range) -> Range { + pub fn find_substr(&self, s_substr: Range) -> Range { let cmp = |i: usize, flip: bool| -> bool { - flip ^ (self.len_lcp(i, substr.start) < substr.len()) + flip ^ (self.len_lcp(i, s_substr.start) < s_substr.len()) }; - let idx = self.sa_inv[substr.start]; + let idx = self.sa_inv[s_substr.start]; let le = self.sa[..idx].partition_point(|&i| cmp(i, false)); let ri = self.sa[idx + 1..].partition_point(|&i| cmp(i, true)) + idx + 1; le..ri } + + /// let t = s\[sa\[sa_range.start\]..sa\[sa_range.start\] + lcp_len\] + c + /// + /// Gets range r such that: + /// - for all i in sa\[r\] s\[i..i + t.len()\] == t + /// - r.len() is the number of matches of t in s + /// + /// # Complexity + /// - Time: O(log(|s|)) + /// - Space: O(1) + pub fn push_back_char(&self, c: usize, sa_range: Range, lcp_len: usize) -> Range { + if !sa_range.is_empty() { + assert!(lcp_len <= self.len_lcp(self.sa[sa_range.start], self.sa[sa_range.end - 1])); + } + let le = self.sa[sa_range.clone()] + .partition_point(|&i| i + lcp_len == self.n || self.s[i + lcp_len] < c) + + sa_range.start; + let ri = self.sa[le..sa_range.end].partition_point(|&i| self.s[i + lcp_len] == c) + le; + le..ri + } + + /// let t = c + s\[sa\[sa_range.start\]..sa\[sa_range.start\] + lcp_len\] + /// + /// Gets range r such that: + /// - for all i in sa\[r\] s\[i..i + t.len()\] == t + /// - r.len() is the number of matches of t in s + /// + /// # Complexity + /// - Time: O(log(|s|)) + /// - Space: O(1) + pub fn push_front_char( + &self, + c: usize, + sa_range: Range, + lcp_len: usize, + ) -> Range { + if !sa_range.is_empty() { + assert!(lcp_len <= self.len_lcp(self.sa[sa_range.start], self.sa[sa_range.end - 1])); + } + if sa_range.is_empty() { + sa_range + } else { + let i = self.sa[sa_range.start]; + self.push_back_substr(i..i + lcp_len, self.cnt[c]..self.cnt[c + 1], 1) + } + } + + /// let t = s\[sa\[sa_range.start\]..sa\[sa_range.start\] + lcp_len\] + s\[s_substr\] + /// + /// Gets range r such that: + /// - for all i in sa\[r\] s\[i..i + t.len()\] == t + /// - r.len() is the number of matches of t in s + /// + /// # Complexity + /// - Time: O(log(|s|)) + /// - Space: O(1) + pub fn push_back_substr( + &self, + s_substr: Range, + sa_range: Range, + lcp_len: usize, + ) -> Range { + if !sa_range.is_empty() { + assert!(lcp_len <= self.len_lcp(self.sa[sa_range.start], self.sa[sa_range.end - 1])); + } + let cmp = |mut i: usize| -> Ordering { + i += lcp_len; + self.cmp_substrs(i..(i + s_substr.len()).min(self.n), s_substr.clone()) + }; + let le = self.sa[sa_range.clone()].partition_point(|&i| cmp(i) == Ordering::Less) + + sa_range.start; + let ri = self.sa[le..sa_range.end].partition_point(|&i| cmp(i) == Ordering::Equal) + le; + le..ri + } + + /// let t = s\[s_substr\] + s\[sa\[sa_range.start\]..sa\[sa_range.start\] + lcp_len\] + /// + /// Gets range r such that: + /// - for all i in sa\[r\] s\[i..i + t.len()\] == t + /// - r.len() is the number of matches of t in s + /// + /// # Complexity + /// - Time: O(log(|s|)) + /// - Space: O(1) + pub fn push_front_substr( + &self, + s_substr: Range, + sa_range: Range, + lcp_len: usize, + ) -> Range { + if !sa_range.is_empty() { + assert!(lcp_len <= self.len_lcp(self.sa[sa_range.start], self.sa[sa_range.end - 1])); + } + if sa_range.is_empty() { + sa_range + } else { + let i = self.sa[sa_range.start]; + self.push_back_substr( + i..i + lcp_len, + self.find_substr(s_substr.clone()), + s_substr.len(), + ) + } + } }