Skip to content

Commit

Permalink
Auto merge of rust-lang#117576 - the8472:fix-io-copy-vec, r=Mark-Simu…
Browse files Browse the repository at this point in the history
…lacrum

Fix excessive initialization and reads beyond EOF in `io::copy(_, Vec<u8>)` specialization

fixes rust-lang#117545 and bczhc/bzip3-rs#8
  • Loading branch information
bors committed Nov 6, 2023
2 parents fee5518 + 78aa5e5 commit 7a892ab
Showing 1 changed file with 54 additions and 22 deletions.
76 changes: 54 additions & 22 deletions library/std/src/io/copy.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::{BorrowedBuf, BufReader, BufWriter, Read, Result, Write, DEFAULT_BUF_SIZE};
use crate::alloc::Allocator;
use crate::cmp;
use crate::cmp::min;
use crate::collections::VecDeque;
use crate::io::IoSlice;
use crate::mem::MaybeUninit;
Expand Down Expand Up @@ -263,36 +264,67 @@ impl<A: Allocator> BufferedWriterSpec for Vec<u8, A> {
fn copy_from<R: Read + ?Sized>(&mut self, reader: &mut R) -> Result<u64> {
let mut bytes = 0;

// avoid allocating before we have determined that there's anything to read
if self.capacity() == 0 {
bytes = stack_buffer_copy(&mut reader.take(DEFAULT_BUF_SIZE as u64), self)?;
if bytes == 0 {
return Ok(0);
// avoid inflating empty/small vecs before we have determined that there's anything to read
if self.capacity() < DEFAULT_BUF_SIZE {
let stack_read_limit = DEFAULT_BUF_SIZE as u64;
bytes = stack_buffer_copy(&mut reader.take(stack_read_limit), self)?;
// fewer bytes than requested -> EOF reached
if bytes < stack_read_limit {
return Ok(bytes);
}
}

// don't immediately offer the vec's whole spare capacity, otherwise
// we might have to fully initialize it if the reader doesn't have a custom read_buf() impl
let mut max_read_size = DEFAULT_BUF_SIZE;

loop {
self.reserve(DEFAULT_BUF_SIZE);
let mut buf: BorrowedBuf<'_> = self.spare_capacity_mut().into();
match reader.read_buf(buf.unfilled()) {
Ok(()) => {}
Err(e) if e.is_interrupted() => continue,
Err(e) => return Err(e),
};
let mut initialized_spare_capacity = 0;

let read = buf.filled().len();
if read == 0 {
break;
}
loop {
let buf = self.spare_capacity_mut();
let read_size = min(max_read_size, buf.len());
let mut buf = BorrowedBuf::from(&mut buf[..read_size]);
// SAFETY: init is either 0 or the init_len from the previous iteration.
unsafe {
buf.set_init(initialized_spare_capacity);
}
match reader.read_buf(buf.unfilled()) {
Ok(()) => {
let bytes_read = buf.len();

// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
// and the number of read bytes can't exceed the spare capacity since
// that's what the buffer is borrowing from.
unsafe { self.set_len(self.len() + read) };
bytes += read as u64;
}
// EOF
if bytes_read == 0 {
return Ok(bytes);
}

Ok(bytes)
// the reader is returning short reads but it doesn't call ensure_init()
if buf.init_len() < buf.capacity() {
max_read_size = usize::MAX;
}
// the reader hasn't returned short reads so far
if bytes_read == buf.capacity() {
max_read_size *= 2;
}

initialized_spare_capacity = buf.init_len() - bytes_read;
bytes += bytes_read as u64;
// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
// and the number of read bytes can't exceed the spare capacity since
// that's what the buffer is borrowing from.
unsafe { self.set_len(self.len() + bytes_read) };

// spare capacity full, reserve more
if self.len() == self.capacity() {
break;
}
}
Err(e) if e.is_interrupted() => continue,
Err(e) => return Err(e),
}
}
}
}
}

Expand Down

0 comments on commit 7a892ab

Please sign in to comment.