You can do a hybrid approach.
You can initialize a fixed-size array via first constructing a [(); N] and then calling .map() on it. Combined with iterators over the input arrays, this should be somewhat performant.
Although I still recommend benchmarking it vs the naive solution.
fn add_2d<'a, 'b, T, const W: usize, const H: usize>(
d1: &'a [[T; W]; H],
d2: &'b [[T; W]; H],
) -> [[T; W]; H]
where
T: 'static,
&'a T: std::ops::Add<&'b T, Output = T>,
{
let mut iter_row = d1.iter().zip(d2.iter());
[(); H].map(move |()| {
let (row1, row2) = iter_row.next().unwrap();
let mut elem_iter = row1.iter().zip(row2.iter());
[(); W].map(move |()| {
let (elem1, elem2) = elem_iter.next().unwrap();
elem1 + elem2
})
})
}
fn main() {
let d1: [[u32; 2]; 3] = [[1, 2], [4, 5], [7, 8]];
let d2: [[u32; 2]; 3] = [[10, 20], [40, 50], [70, 80]];
let sums = add_2d(&d1, &d2);
println!("{:?}", sums);
}
[[11, 22], [44, 55], [77, 88]]
Alternatively, if you don't mind unsafe, this is what I would probably go for to achieve maximum performance:
use std::mem::MaybeUninit;
fn add_2d<'a, 'b, T, const W: usize, const H: usize>(
d1: &'a [[T; W]; H],
d2: &'b [[T; W]; H],
) -> [[T; W]; H]
where
T: 'static,
&'a T: std::ops::Add<&'b T, Output = T>,
{
let mut result: MaybeUninit<[[T; W]; H]> = MaybeUninit::uninit();
let arr_ptr = result.as_mut_ptr() as *mut [T; W];
unsafe {
for y in 0..H {
let row_ptr = arr_ptr.add(y) as *mut T;
let row1 = d1.get_unchecked(y);
let row2 = d2.get_unchecked(y);
for x in 0..W {
row_ptr
.add(x)
.write(row1.get_unchecked(x) + row2.get_unchecked(x));
}
}
result.assume_init()
}
}
fn main() {
let d1: [[u32; 2]; 3] = [[1, 2], [4, 5], [7, 8]];
let d2: [[u32; 2]; 3] = [[10, 20], [40, 50], [70, 80]];
let sums = add_2d(&d1, &d2);
println!("{:?}", sums);
}
[[11, 22], [44, 55], [77, 88]]
Be sure to use .write() instead of a simple assignment operation, because the assignment operation would run the Drop function of the previous, uninitialized element. Which would cause undefined behaviour.
Benchmarks
I added hkBst's .map().collect().try_into().unwrap() based method to the benchmarks, as well:
#![feature(bench_black_box)]
use std::mem::MaybeUninit;
fn add_2d_iter<'a, 'b, T, const W: usize, const H: usize>(
d1: &'a [[T; W]; H],
d2: &'b [[T; W]; H],
) -> [[T; W]; H]
where
T: 'static,
&'a T: std::ops::Add<&'b T, Output = T>,
{
let mut iter_row = d1.iter().zip(d2.iter());
[(); H].map(move |()| {
let (row1, row2) = iter_row.next().unwrap();
let mut elem_iter = row1.iter().zip(row2.iter());
[(); W].map(move |()| {
let (elem1, elem2) = elem_iter.next().unwrap();
elem1 + elem2
})
})
}
fn add_2d_uninit<'a, 'b, T, const W: usize, const H: usize>(
d1: &'a [[T; W]; H],
d2: &'b [[T; W]; H],
) -> [[T; W]; H]
where
T: 'static,
&'a T: std::ops::Add<&'b T, Output = T>,
{
let mut result: MaybeUninit<[[T; W]; H]> = MaybeUninit::uninit();
let arr_ptr = result.as_mut_ptr() as *mut [T; W];
unsafe {
for y in 0..H {
let row_ptr = arr_ptr.add(y) as *mut T;
let row1 = d1.get_unchecked(y);
let row2 = d2.get_unchecked(y);
for x in 0..W {
row_ptr
.add(x)
.write(row1.get_unchecked(x) + row2.get_unchecked(x));
}
}
result.assume_init()
}
}
fn add_2d_unwrap<'a, 'b, T, const W: usize, const H: usize>(
a: &'a [[T; W]; H],
b: &'b [[T; W]; H],
) -> [[T; W]; H]
where
T: std::fmt::Debug + 'static,
&'a T: std::ops::Add<&'b T, Output = T>,
{
a.iter()
.zip(b.iter())
.map(|(ai, bi)| {
ai.iter()
.zip(bi.iter())
.map(|(aij, bij)| aij + bij)
.collect::<Vec<_>>()
.try_into()
.unwrap()
})
.collect::<Vec<_>>()
.try_into()
.unwrap()
}
const W: usize = 30;
const H: usize = 30;
const ITERATIONS: usize = 1000000;
fn check_result(d1: &[[i32; W]; H], d2: &[[i32; W]; H], result: &[[i32; W]; H]) -> bool {
for y in 0..H {
for x in 0..W {
if d1[y][x] + d2[y][x] != result[y][x] {
return false;
}
}
}
true
}
fn bench<'a, 'b>(
a: &'a [[i32; W]; H],
b: &'b [[i32; W]; H],
function: fn(&'a [[i32; W]; H], &'b [[i32; W]; H]) -> [[i32; W]; H],
) -> Vec<u128> {
// Assert correctness and cache warmup
{
let sums = function(a, b);
assert!(check_result(a, b, &sums));
std::hint::black_box(sums);
}
// Bench runs
(0..5)
.into_iter()
.map(|_| {
let t0 = std::time::Instant::now();
for _ in 0..ITERATIONS {
std::hint::black_box(function(a, b));
}
t0.elapsed().as_micros()
})
.collect::<Vec<_>>()
}
fn main() {
let mut num_iter = (1..).into_iter();
let mut init_arr = move || [(); H].map(|()| [(); W].map(|()| num_iter.next().unwrap()));
let d1 = init_arr();
let d2 = init_arr();
let times_iter = bench(&d1, &d2, add_2d_iter);
let times_uninit = bench(&d1, &d2, add_2d_uninit);
let times_unwrap = bench(&d1, &d2, add_2d_unwrap);
println!("iter: {:?}", times_iter);
println!("uninit: {:?}", times_uninit);
println!("unwrap: {:?}", times_unwrap);
}
> cargo +nightly run --release
iter: [223519, 207772, 208043, 208497, 208807]
uninit: [202719, 202888, 201159, 202685, 201941]
unwrap: [864150, 866233, 863023, 859486, 852098]
Seems like unwrap is a bit slower, while the other two don't really make a difference.
MaybeUninitto avoid the initialization overhead.MaybeUninit, haven't heard of it yet. Thank you!