Skip to content

Commit

Permalink
Merge pull request #409 from robertknight/faster-5d-plus-copy
Browse files Browse the repository at this point in the history
Optimize copying of non-contiguous tensors with 5+ dimensions
  • Loading branch information
robertknight authored Nov 15, 2024
2 parents 408ebe0 + bf73c6f commit 26eeef0
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions rten-tensor/src/copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,14 @@ pub fn copy_into_slice<'a, T: Clone>(
src.merge_axes();

if src.ndim() > 4 {
for (dst, src) in dest.iter_mut().zip(src.iter()) {
dst.write(src.clone());
let chunk_size = src.shape()[src.ndim() - 4..].iter().product();
let mut n_init = 0;
for (src, dest) in src.inner_iter::<4>().zip(dest.chunks_mut(chunk_size)) {
copy_into_slice(src.as_dyn(), dest);
n_init += chunk_size;
}
assert!(n_init == dest.len());

// Safety: Loop above initialized all elements of `dest`.
return unsafe { transmute::<&mut [MaybeUninit<T>], &[T]>(dest) };
}
Expand Down

0 comments on commit 26eeef0

Please sign in to comment.