From 5d2568bcf9710b6170fc9c489202734ebcd108c5 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 4 Jan 2019 20:14:40 +0100 Subject: [PATCH 1/9] Memory safe DMA API --- ci/dma/.gitignore | 3 + ci/dma/Cargo.toml | 9 + ci/dma/examples/eight.rs | 151 +++++++++++++++++ ci/dma/examples/five.rs | 144 ++++++++++++++++ ci/dma/examples/four.rs | 110 +++++++++++++ ci/dma/examples/one.rs | 114 +++++++++++++ ci/dma/examples/seven.rs | 99 +++++++++++ ci/dma/examples/six.rs | 158 ++++++++++++++++++ ci/dma/examples/three.rs | 114 +++++++++++++ ci/dma/examples/two.rs | 90 ++++++++++ ci/dma/src/lib.rs | 85 ++++++++++ ci/script.sh | 8 + src/SUMMARY.md | 1 + src/dma.md | 348 +++++++++++++++++++++++++++++++++++++++ 14 files changed, 1434 insertions(+) create mode 100644 ci/dma/.gitignore create mode 100644 ci/dma/Cargo.toml create mode 100644 ci/dma/examples/eight.rs create mode 100644 ci/dma/examples/five.rs create mode 100644 ci/dma/examples/four.rs create mode 100644 ci/dma/examples/one.rs create mode 100644 ci/dma/examples/seven.rs create mode 100644 ci/dma/examples/six.rs create mode 100644 ci/dma/examples/three.rs create mode 100644 ci/dma/examples/two.rs create mode 100644 ci/dma/src/lib.rs create mode 100644 src/dma.md diff --git a/ci/dma/.gitignore b/ci/dma/.gitignore new file mode 100644 index 0000000..82f2ad7 --- /dev/null +++ b/ci/dma/.gitignore @@ -0,0 +1,3 @@ +/target +.#* +**/*.rs.bk diff --git a/ci/dma/Cargo.toml b/ci/dma/Cargo.toml new file mode 100644 index 0000000..d661cd0 --- /dev/null +++ b/ci/dma/Cargo.toml @@ -0,0 +1,9 @@ +[package] +authors = ["Jorge Aparicio "] +edition = "2018" +name = "shared" +version = "0.1.0" + +[dependencies] +as-slice = "0.1.0" +pin-utils = "0.1.0-alpha.4" diff --git a/ci/dma/examples/eight.rs b/ci/dma/examples/eight.rs new file mode 100644 index 0000000..ae8a01d --- /dev/null +++ b/ci/dma/examples/eight.rs @@ -0,0 +1,151 @@ +//! Destructors + +#![deny(missing_docs, warnings)] + +use core::{ + hint, + marker::Unpin, + mem, + ops::{Deref, DerefMut}, + pin::Pin, + ptr, + sync::atomic::{self, Ordering}, +}; + +use as_slice::{AsMutSlice, AsSlice}; +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +/// A DMA transfer +pub struct Transfer { + // NOTE: always `Some` variant + inner: Option>, +} + +// NOTE: previously named `Transfer` +struct Inner { + buffer: Pin, + serial: Serial1, +} + +impl Transfer { + /// Blocks until the transfer is done and returns the buffer + pub fn wait(mut self) -> (Pin, Serial1) { + while self.is_done() {} + + atomic::compiler_fence(Ordering::Acquire); + + let inner = self + .inner + .take() + .unwrap_or_else(|| unsafe { hint::unreachable_unchecked() }); + (inner.buffer, inner.serial) + } +} + +impl Drop for Transfer { + fn drop(&mut self) { + if let Some(inner) = self.inner.as_mut() { + // NOTE: this is a volatile write + inner.serial.dma.stop(); + + // we need a read here to make the Acquire fence effective + // we do *not* need this if `dma.stop` does a RMW operation + unsafe { + ptr::read_volatile(&0); + } + + // we need a fence here for the same reason we need one in `Transfer.wait` + atomic::compiler_fence(Ordering::Acquire); + } + } +} + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn read_exact(mut self, mut buffer: Pin) -> Transfer + where + B: DerefMut + 'static, + B::Target: AsMutSlice + Unpin, + { + // .. same as before .. + let slice = buffer.as_mut_slice(); + let (ptr, len) = (slice.as_mut_ptr(), slice.len()); + + self.dma.set_source_address(USART1_RX, false); + self.dma.set_destination_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + inner: Some(Inner { + buffer, + serial: self, + }), + } + } + + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn write_all(mut self, buffer: Pin) -> Transfer + where + B: Deref + 'static, + B::Target: AsSlice, + { + // .. same as before .. + let slice = buffer.as_slice(); + let (ptr, len) = (slice.as_ptr(), slice.len()); + + self.dma.set_destination_address(USART1_TX, false); + self.dma.set_source_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + inner: Some(Inner { + buffer, + serial: self, + }), + } + } +} + +#[allow(dead_code, unused_mut, unused_variables)] +fn reuse(serial: Serial1) { + let buf = Pin::new(Box::new([0; 16])); + + let t = serial.read_exact(buf); // compiler_fence(Ordering::Release) ▲ + + // .. + + // this stops the DMA transfer and frees memory + mem::drop(t); // compiler_fence(Ordering::Acquire) ▼ + + // this likely reuses the previous memory allocation + let mut buf = Box::new([0; 16]); + + // .. do stuff with `buf` .. +} + +// UNCHANGED + +fn main() {} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + dma: Dma1Channel1, + // .. +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } +} diff --git a/ci/dma/examples/five.rs b/ci/dma/examples/five.rs new file mode 100644 index 0000000..c580256 --- /dev/null +++ b/ci/dma/examples/five.rs @@ -0,0 +1,144 @@ +//! Generic buffer + +#![deny(missing_docs, warnings)] + +use core::sync::atomic::{self, Ordering}; + +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +// as-slice = "0.1.0" +use as_slice::{AsMutSlice, AsSlice}; + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn read_exact(mut self, mut buffer: B) -> Transfer + where + B: AsMutSlice, + { + // NOTE: added + let slice = buffer.as_mut_slice(); + let (ptr, len) = (slice.as_mut_ptr(), slice.len()); + + self.dma.set_source_address(USART1_RX, false); + + // NOTE: tweaked + self.dma.set_destination_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } + + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + fn write_all(mut self, buffer: B) -> Transfer + where + B: AsSlice, + { + // NOTE: added + let slice = buffer.as_slice(); + let (ptr, len) = (slice.as_ptr(), slice.len()); + + self.dma.set_destination_address(USART1_TX, false); + + // NOTE: tweaked + self.dma.set_source_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } +} + +#[allow(dead_code, unused_variables)] +fn reuse(serial: Serial1, msg: &'static mut [u8]) { + // send a message + let t1 = serial.write_all(msg); + + // .. + + let (msg, serial) = t1.wait(); // `msg` is now `&'static [u8]` + + msg.reverse(); + + // now send it in reverse + let t2 = serial.write_all(msg); + + // .. + + let (buf, serial) = t2.wait(); + + // .. +} + +#[allow(dead_code, unused_variables)] +fn invalidate(serial: Serial1) { + let t = start(serial); + + bar(); + + let (buf, serial) = t.wait(); +} + +#[inline(never)] +fn start(serial: Serial1) -> Transfer<[u8; 16]> { + // array allocated in this frame + let buffer = [0; 16]; + + serial.read_exact(buffer) +} + +#[allow(unused_mut, unused_variables)] +#[inline(never)] +fn bar() { + // stack variables + let mut x = 0; + let mut y = 0; + + // use `x` and `y` +} + +// UNCHANGED + +fn main() {} + +/// A DMA transfer +pub struct Transfer { + buffer: B, + serial: Serial1, +} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + dma: Dma1Channel1, + // .. +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } + + /// Blocks until the transfer is done and returns the buffer + pub fn wait(self) -> (B, Serial1) { + while self.is_done() {} + + atomic::compiler_fence(Ordering::Acquire); + + (self.buffer, self.serial) + } +} diff --git a/ci/dma/examples/four.rs b/ci/dma/examples/four.rs new file mode 100644 index 0000000..81f7b65 --- /dev/null +++ b/ci/dma/examples/four.rs @@ -0,0 +1,110 @@ +//! Compiler (mis)optimizations + +#![deny(missing_docs, warnings)] + +use core::sync::atomic::{self, Ordering}; + +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn read_exact(mut self, buffer: &'static mut [u8]) -> Transfer<&'static mut [u8]> { + self.dma.set_source_address(USART1_RX, false); + self.dma + .set_destination_address(buffer.as_mut_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + // NOTE: added + atomic::compiler_fence(Ordering::Release); + + // NOTE: this is a volatile *write* + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } + + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn write_all(mut self, buffer: &'static [u8]) -> Transfer<&'static [u8]> { + self.dma.set_destination_address(USART1_TX, false); + self.dma.set_source_address(buffer.as_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + // NOTE: added + atomic::compiler_fence(Ordering::Release); + + // NOTE: this is a volatile *write* + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } +} + +impl Transfer { + /// Blocks until the transfer is done and returns the buffer + pub fn wait(self) -> (B, Serial1) { + // NOTE: this is a volatile *read* + while self.is_done() {} + + // NOTE: added + atomic::compiler_fence(Ordering::Acquire); + + (self.buffer, self.serial) + } + + // .. +} + +#[allow(dead_code, unused_variables)] +fn reorder(serial: Serial1, buf: &'static mut [u8], x: &mut u32) { + // zero the buffer (for no particular reason) + buf.iter_mut().for_each(|byte| *byte = 0); + + *x += 1; + + let t = serial.read_exact(buf); // compiler_fence(Ordering::Release) ▲ + + // NOTE: the processor can't access `buf` between the fences + // ... do other stuff .. + *x += 2; + + let (buf, serial) = t.wait(); // compiler_fence(Ordering::Acquire) ▼ + + *x += 3; + + buf.reverse(); + + // .. do stuff with `buf` .. +} + +// UNCHANGED + +fn main() {} + +/// A DMA transfer +pub struct Transfer { + buffer: B, + serial: Serial1, +} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + dma: Dma1Channel1, + // .. +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } +} diff --git a/ci/dma/examples/one.rs b/ci/dma/examples/one.rs new file mode 100644 index 0000000..600b5e8 --- /dev/null +++ b/ci/dma/examples/one.rs @@ -0,0 +1,114 @@ +//! A first stab + +#![deny(missing_docs, warnings)] + +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +/// A singleton that represents serial port #1 +pub struct Serial1 { + // NOTE: we extend this struct by adding the DMA channel singleton + dma: Dma1Channel1, + // .. +} + +impl Serial1 { + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn write_all<'a>(mut self, buffer: &'a [u8]) -> Transfer<&'a [u8]> { + self.dma.set_destination_address(USART1_TX, false); + self.dma.set_source_address(buffer.as_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + self.dma.start(); + + Transfer { buffer } + } +} + +/// A DMA transfer +pub struct Transfer { + buffer: B, +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } + + /// Blocks until the transfer is done and returns the buffer + pub fn wait(self) -> B { + // Busy wait until the transfer is done + while !self.is_done() {} + + self.buffer + } +} + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn read_exact<'a>(&mut self, buffer: &'a mut [u8]) -> Transfer<&'a mut [u8]> { + self.dma.set_source_address(USART1_RX, false); + self.dma + .set_destination_address(buffer.as_mut_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + self.dma.start(); + + Transfer { buffer } + } +} + +#[allow(dead_code)] +fn write(serial: Serial1) { + // fire and forget + serial.write_all(b"Hello, world!\n"); + + // do other stuff +} + +#[allow(dead_code)] +fn read(mut serial: Serial1) { + let mut buf = [0; 16]; + let t = serial.read_exact(&mut buf); + + // do other stuff + + t.wait(); + + match buf.split(|b| *b == b'\n').next() { + Some(b"some-command") => { /* do something */ } + _ => { /* do something else */ } + } +} + +use core::mem; + +#[allow(dead_code)] +fn unsound(mut serial: Serial1) { + start(&mut serial); + bar(); +} + +#[inline(never)] +fn start(serial: &mut Serial1) { + let mut buf = [0; 16]; + + // start a DMA transfer and forget the returned `Transfer` value + mem::forget(serial.read_exact(&mut buf)); +} + +#[allow(unused_variables, unused_mut)] +#[inline(never)] +fn bar() { + // stack variables + let mut x = 0; + let mut y = 0; + + // use `x` and `y` +} + +fn main() {} diff --git a/ci/dma/examples/seven.rs b/ci/dma/examples/seven.rs new file mode 100644 index 0000000..86b61ac --- /dev/null +++ b/ci/dma/examples/seven.rs @@ -0,0 +1,99 @@ +//! `'static` bound + +#![deny(missing_docs, warnings)] + +use core::{ + marker::Unpin, + ops::{Deref, DerefMut}, + pin::Pin, + sync::atomic::{self, Ordering}, +}; + +use as_slice::{AsMutSlice, AsSlice}; +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn read_exact(mut self, mut buffer: Pin) -> Transfer + where + // NOTE: added 'static bound + B: DerefMut + 'static, + B::Target: AsMutSlice + Unpin, + { + // .. same as before .. + let slice = buffer.as_mut_slice(); + let (ptr, len) = (slice.as_mut_ptr(), slice.len()); + + self.dma.set_source_address(USART1_RX, false); + self.dma.set_destination_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } + + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn write_all(mut self, buffer: Pin) -> Transfer + where + // NOTE: added 'static bound + B: Deref + 'static, + B::Target: AsSlice, + { + // .. same as before .. + let slice = buffer.as_slice(); + let (ptr, len) = (slice.as_ptr(), slice.len()); + + self.dma.set_destination_address(USART1_TX, false); + self.dma.set_source_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } +} + +// UNCHANGED + +fn main() {} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + dma: Dma1Channel1, + // .. +} + +/// A DMA transfer +pub struct Transfer { + buffer: Pin, + serial: Serial1, +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } + + /// Blocks until the transfer is done and returns the buffer + pub fn wait(self) -> (Pin, Serial1) { + while self.is_done() {} + + atomic::compiler_fence(Ordering::Acquire); + + (self.buffer, self.serial) + } +} diff --git a/ci/dma/examples/six.rs b/ci/dma/examples/six.rs new file mode 100644 index 0000000..14e5702 --- /dev/null +++ b/ci/dma/examples/six.rs @@ -0,0 +1,158 @@ +//! Immovable buffers + +#![deny(missing_docs, warnings)] + +use core::{ + marker::Unpin, + mem, + ops::{Deref, DerefMut}, + pin::Pin, + sync::atomic::{self, Ordering}, +}; + +use as_slice::{AsMutSlice, AsSlice}; +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +/// A DMA transfer +pub struct Transfer { + // NOTE: changed + buffer: Pin, + serial: Serial1, +} + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn read_exact(mut self, mut buffer: Pin) -> Transfer + where + // NOTE: bounds changed + B: DerefMut, + B::Target: AsMutSlice + Unpin, + { + // .. same as before .. + let slice = buffer.as_mut_slice(); + let (ptr, len) = (slice.as_mut_ptr(), slice.len()); + + self.dma.set_source_address(USART1_RX, false); + self.dma.set_destination_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } + + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn write_all(mut self, buffer: Pin) -> Transfer + where + // NOTE: bounds changed + B: Deref, + B::Target: AsSlice, + { + // .. same as before .. + let slice = buffer.as_slice(); + let (ptr, len) = (slice.as_ptr(), slice.len()); + + self.dma.set_destination_address(USART1_TX, false); + self.dma.set_source_address(ptr as usize, true); + self.dma.set_transfer_length(len); + + atomic::compiler_fence(Ordering::Release); + self.dma.start(); + + Transfer { + buffer, + serial: self, + } + } +} + +#[allow(dead_code, unused_variables)] +fn static_mut(serial: Serial1, buf: &'static mut [u8]) { + let buf = Pin::new(buf); + + let t = serial.read_exact(buf); + + // .. + + let (buf, serial) = t.wait(); + + // .. +} + +#[allow(dead_code, unused_variables)] +fn boxed(serial: Serial1, buf: Box<[u8]>) { + let buf = Pin::new(buf); + + let t = serial.read_exact(buf); + + // .. + + let (buf, serial) = t.wait(); + + // .. +} + +#[allow(dead_code)] +fn unsound(serial: Serial1) { + start(serial); + + bar(); +} + +// pin-utils = "0.1.0-alpha.4" +use pin_utils::pin_mut; + +#[inline(never)] +fn start(serial: Serial1) { + let buffer = [0; 16]; + + // pin the `buffer` to this stack frame + // `buffer` now has type `Pin<&mut [u8; 16]>` + pin_mut!(buffer); + + mem::forget(serial.read_exact(buffer)); +} + +#[allow(unused_mut, unused_variables)] +#[inline(never)] +fn bar() { + // stack variables + let mut x = 0; + let mut y = 0; + + // use `x` and `y` +} + +// UNCHANGED + +fn main() {} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + dma: Dma1Channel1, + // .. +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } + + /// Blocks until the transfer is done and returns the buffer + pub fn wait(self) -> (Pin, Serial1) { + while self.is_done() {} + + atomic::compiler_fence(Ordering::Acquire); + + (self.buffer, self.serial) + } +} diff --git a/ci/dma/examples/three.rs b/ci/dma/examples/three.rs new file mode 100644 index 0000000..518bc46 --- /dev/null +++ b/ci/dma/examples/three.rs @@ -0,0 +1,114 @@ +//! Overlapping use + +#![deny(missing_docs, warnings)] + +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +/// A DMA transfer +pub struct Transfer { + buffer: B, + // NOTE: added + serial: Serial1, +} + +impl Transfer { + /// Blocks until the transfer is done and returns the buffer + // NOTE: the return value has changed + pub fn wait(self) -> (B, Serial1) { + // Busy wait until the transfer is done + while !self.is_done() {} + + (self.buffer, self.serial) + } + + // .. +} + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + // NOTE we now take `self` by value + pub fn read_exact(mut self, buffer: &'static mut [u8]) -> Transfer<&'static mut [u8]> { + self.dma.set_source_address(USART1_RX, false); + self.dma + .set_destination_address(buffer.as_mut_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + self.dma.start(); + + // .. same as before .. + + Transfer { + buffer, + // NOTE: added + serial: self, + } + } + + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + // NOTE we now take `self` by value + pub fn write_all(mut self, buffer: &'static [u8]) -> Transfer<&'static [u8]> { + self.dma.set_destination_address(USART1_TX, false); + self.dma.set_source_address(buffer.as_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + self.dma.start(); + + // .. same as before .. + + Transfer { + buffer, + // NOTE: added + serial: self, + } + } +} + +#[allow(dead_code, unused_variables)] +fn read(serial: Serial1, buf: &'static mut [u8; 16]) { + let t = serial.read_exact(buf); + + // let byte = serial.read(); //~ ERROR: `serial` has been moved + + // .. do stuff .. + + let (serial, buf) = t.wait(); + + // .. do more stuff .. +} + +#[allow(dead_code, unused_variables)] +fn reorder(serial: Serial1, buf: &'static mut [u8]) { + // zero the buffer (for no particular reason) + buf.iter_mut().for_each(|byte| *byte = 0); + + let t = serial.read_exact(buf); + + // ... do other stuff .. + + let (buf, serial) = t.wait(); + + buf.reverse(); + + // .. do stuff with `buf` .. +} + +// UNCHANGED + +fn main() {} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + dma: Dma1Channel1, + // .. +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } +} diff --git a/ci/dma/examples/two.rs b/ci/dma/examples/two.rs new file mode 100644 index 0000000..deff61d --- /dev/null +++ b/ci/dma/examples/two.rs @@ -0,0 +1,90 @@ +//! `mem::forget` + +#![deny(missing_docs, warnings)] + +use shared::{Dma1Channel1, USART1_RX, USART1_TX}; + +impl Serial1 { + /// Receives data into the given `buffer` until it's filled + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn read_exact(&mut self, buffer: &'static mut [u8]) -> Transfer<&'static mut [u8]> { + // .. same as before .. + self.dma.set_source_address(USART1_RX, false); + self.dma + .set_destination_address(buffer.as_mut_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + self.dma.start(); + + Transfer { buffer } + } + + /// Sends out the given `buffer` + /// + /// Returns a value that represents the in-progress DMA transfer + pub fn write_all(mut self, buffer: &'static [u8]) -> Transfer<&'static [u8]> { + // .. same as before .. + self.dma.set_destination_address(USART1_TX, false); + self.dma.set_source_address(buffer.as_ptr() as usize, true); + self.dma.set_transfer_length(buffer.len()); + + self.dma.start(); + + Transfer { buffer } + } +} + +use core::mem; + +#[allow(dead_code)] +fn sound(mut serial: Serial1, buf: &'static mut [u8; 16]) { + // NOTE `buf` is moved into `foo` + foo(&mut serial, buf); + bar(); +} + +#[inline(never)] +fn foo(serial: &mut Serial1, buf: &'static mut [u8]) { + // start a DMA transfer and forget the returned `Transfer` value + mem::forget(serial.read_exact(buf)); +} + +#[allow(unused_mut, unused_variables)] +#[inline(never)] +fn bar() { + // stack variables + let mut x = 0; + let mut y = 0; + + // use `x` and `y` +} + +// UNCHANGED +fn main() {} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + dma: Dma1Channel1, + // .. +} + +/// A DMA transfer +pub struct Transfer { + buffer: B, +} + +impl Transfer { + /// Returns `true` if the DMA transfer has finished + pub fn is_done(&self) -> bool { + !Dma1Channel1::in_progress() + } + + /// Blocks until the transfer is done and returns the buffer + pub fn wait(self) -> B { + // Busy wait until the transfer is done + while !self.is_done() {} + + self.buffer + } +} diff --git a/ci/dma/src/lib.rs b/ci/dma/src/lib.rs new file mode 100644 index 0000000..8ea59c6 --- /dev/null +++ b/ci/dma/src/lib.rs @@ -0,0 +1,85 @@ +#![allow(unused_variables)] + +pub const USART1_TX: usize = 0x4000_0000; +pub const USART1_RX: usize = 0x4000_0004; + +/// A singleton that represents a single DMA channel (channel 1 in this case) +/// +/// This singleton has exclusive access to the registers of the DMA channel 1 +pub struct Dma1Channel1 { + // .. +} + +impl Dma1Channel1 { + /// Data will be written to this `address` + /// + /// `inc` indicates whether the address will be incremented after every byte transfer + /// + /// NOTE this performs a volatile write + pub fn set_destination_address(&mut self, address: usize, inc: bool) { + // .. + } + + /// Data will be read from this `address` + /// + /// `inc` indicates whether the address will be incremented after every byte transfer + /// + /// NOTE this performs a volatile write + pub fn set_source_address(&mut self, address: usize, inc: bool) { + // .. + } + + /// Number of bytes to transfer + /// + /// NOTE this performs a volatile write + pub fn set_transfer_length(&mut self, len: usize) { + // .. + } + + /// Starts the DMA transfer + /// + /// NOTE this performs a volatile write + pub fn start(&mut self) { + // .. + } + + /// Stops the DMA transfer + /// + /// NOTE this performs a volatile write + pub fn stop(&mut self) { + // .. + } + + /// Returns `true` if there's a transfer in progress + /// + /// NOTE this performs a volatile read + pub fn in_progress() -> bool { + // .. + false + } +} + +/// A singleton that represents serial port #1 +pub struct Serial1 { + // .. +} + +impl Serial1 { + /// Reads out a single byte + /// + /// NOTE: blocks if no byte is available to be read + pub fn read(&mut self) -> Result { + // .. + Ok(0) + } + + /// Sends out a single byte + /// + /// NOTE: blocks if the output FIFO buffer is full + pub fn write(&mut self, byte: u8) -> Result<(), Error> { + // .. + Ok(()) + } +} + +pub enum Error {} diff --git a/ci/script.sh b/ci/script.sh index 400b560..b5bc27f 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -219,6 +219,14 @@ main() { popd popd + + # # DMA + # NOTE(nightly) this will require nightly until core::pin is stabilized (1.33) + if [ $TRAVIS_RUST_VERSION = nightly ]; then + pushd dma + cargo build --examples + popd + fi } # checks that 2018 idioms are being used diff --git a/src/SUMMARY.md b/src/SUMMARY.md index a5145f3..7a25c7e 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -8,5 +8,6 @@ - [Assembly on stable](./asm.md) - [Logging with symbols](./logging.md) - [Global singletons](./singleton.md) +- [DMA](./dma.md) --- [A note on compiler support](./compiler-support.md) diff --git a/src/dma.md b/src/dma.md new file mode 100644 index 0000000..7df2311 --- /dev/null +++ b/src/dma.md @@ -0,0 +1,348 @@ +# Direct Memory Access (DMA) + +This section covers the core requirements for building a memory safe API around +DMA transfers. + +The DMA peripheral is used to perform memory transfers in parallel to the work +of the processor (the execution of the main program). A DMA transfer is more or +less equivalent to spawning a thread (see [`thread::spawn`]) to do a `memcpy`. +We'll use the fork-join model to illustrate the requirements of a memory safe +API. + +[`thread::spawn`]: https://doc.rust-lang.org/std/thread/fn.spawn.html + +Consider the following DMA primitives: + +``` rust +{{#include ../ci/dma/src/lib.rs:6:57}} +{{#include ../ci/dma/src/lib.rs:59:60}} +``` + +Assume that the `Dma1Channel1` is statically configured to work with serial port +(AKA UART or USART) #1: `Serial1`. `Serial1` provides the following *blocking* +API: + +``` rust +{{#include ../ci/dma/src/lib.rs:62:72}} +{{#include ../ci/dma/src/lib.rs:74:80}} +{{#include ../ci/dma/src/lib.rs:82:83}} +``` + +Let's say we want to extend `Serial1` API to (a) asynchronously send out a +buffer and (b) asynchronously fill a buffer. + +We'll start with a memory unsafe API and we'll iterate on it until it's +completely memory safe. On each step we'll show you how the API can be broken to +make you aware of the issues that need to be addressed when dealing with +asynchronous memory operations. + +## A first stab + +For starters, let's try to use the [`Write::write_all`] API as a reference. To +keep things simple let's ignore all error handling. + +[`Write::write_all`]: https://doc.rust-lang.org/std/io/trait.Write.html#method.write_all + +``` rust +{{#include ../ci/dma/examples/one.rs:7:47}} +``` + +> **NOTE:** `Transfer` could expose a futures or generator based API instead of +> the API shown above. That's an API design question that has little bearing on +> the memory safety of the overall API so we won't delve into it in this text. + +We can also implement an asynchronous version of [`Read::read_exact`]. + +[`Read::read_exact`]: https://doc.rust-lang.org/std/io/trait.Read.html#method.read_exact + +``` rust +{{#include ../ci/dma/examples/one.rs:49:63}} +``` + +Here's how to use the `write_all` API: + +``` rust +{{#include ../ci/dma/examples/one.rs:66:71}} +``` + +And here's an example of using the `read_exact` API: + +``` rust +{{#include ../ci/dma/examples/one.rs:74:86}} +``` + +## `mem::forget` + +[`mem::forget`] is a safe API. If our API is truly safe then we should be able +to use both together without running into undefined behavior. However, that's +not the case; consider the following example: + +[`mem::forget`]: https://doc.rust-lang.org/std/mem/fn.forget.html + +``` rust +{{#include ../ci/dma/examples/one.rs:91:103}} +{{#include ../ci/dma/examples/one.rs:105:112}} +``` + +Here we start a DMA transfer, in `foo`, to fill an array allocated on the stack +and then `mem::forget` the returned `Transfer` value. Then we proceed to return +from `foo` and execute the function `bar`. + +This series of operations results in undefined behavior. The DMA transfer writes +to stack memory but that memory is released when `foo` returns and then reused +by `bar` to allocate variables like `x` and `y`. At runtime this could result in +variables `x` and `y` changing their value at random times. The DMA transfer +could also overwrite the state (e.g. link register) pushed onto the stack by the +prologue of function `bar`. + +Note that if we had not use `mem::forget`, but `mem::drop`, it would have been +possible to make `Transfer`'s destructor stop the DMA transfer and then the +program would have been safe. But one can *not* rely on destructors running to +enforce memory safety because `mem::forget` and memory leaks (see RC cycles) are +safe in Rust. + +We can fix this particular problem by changing the lifetime of the buffer from +`'a` to `'static` in both APIs. + +``` rust +{{#include ../ci/dma/examples/two.rs:7:12}} +{{#include ../ci/dma/examples/two.rs:21:27}} +{{#include ../ci/dma/examples/two.rs:35:36}} +``` + +If we try to replicate the previous problem we note that `mem::forget` no longer +causes problems. + +``` rust +{{#include ../ci/dma/examples/two.rs:40:52}} +{{#include ../ci/dma/examples/two.rs:54:61}} +``` + +As before, the DMA transfer continues after `mem::forget`-ing the `Transfer` +value. This time that's not an issue because `buf` is statically allocated +(e.g. `static mut` variable) and not on the stack. + +## Overlapping use + +Our API doesn't prevent the user from using the `Serial` interface while the DMA +transfer is in progress. This could lead the transfer to fail or data to be +lost. + +There are several ways to prevent overlapping use. One way is to have `Transfer` +take ownership of `Serial1` and return it back when `wait` is called. + +``` rust +{{#include ../ci/dma/examples/three.rs:7:32}} +{{#include ../ci/dma/examples/three.rs:40:53}} +{{#include ../ci/dma/examples/three.rs:60:68}} +``` +The move semantics statically prevent access to `Serial1` while the transfer is +in progress. + +``` rust +{{#include ../ci/dma/examples/three.rs:71:81}} +``` + +There are other ways to prevent overlapping use. For example, a (`Cell`) flag +that indicates whether a DMA transfer is in progress could be added to +`Serial1`. When the flag is set `read`, `write`, `read_exact` and `write_all` +would all return an error (e.g. `Error::InUse`) at runtime. The flag would be +set when `write_all` / `read_exact` is used and cleared in `Transfer.wait`. + +## Compiler (mis)optimizations + +The compiler is free to re-order and merge non-volatile memory operations to +better optimize a program. With our current API, this freedom can lead to +undefined behavior. Consider the following example: + +``` rust +{{#include ../ci/dma/examples/three.rs:84:97}} +``` + +Here the compiler is free to move `buf.reverse()` before `t.wait()`, which would +result in a data race: both the processor and the DMA would end up modifying +`buf` at the same time. Similarly the compiler can move the zeroing operation to +after `read_exact`, which would also result in a data race. + +To prevent these problematic reorderings we can use a [`compiler_fence`] + +[`compiler_fence`]: https://doc.rust-lang.org/core/sync/atomic/fn.compiler_fence.html + +``` rust +{{#include ../ci/dma/examples/four.rs:9:65}} +``` + +We use `Ordering::Release` in `read_exact` and `write_all` to prevent all +preceding memory operations from being moved *after* `self.dma.start()`, which +performs a volatile write. + +Likewise, we use `Ordering::Acquire` in `Transfer.wait` to prevent all +subsequent memory operations from being moved *before* `self.is_done()`, which +performs a volatile read. + +To better visualize the effect of the fences here's a slightly tweaked version +of the example from the previous section. We have added the fences and their +orderings in the comments. + +``` rust +{{#include ../ci/dma/examples/four.rs:68:87}} +``` + +The zeroing operation can *not* be moved *after* `read_exact` due to the +`Release` fence. Similarly, the `reverse` operation can *not* be moved *before* +`wait` due to the `Acquire` fence. The memory operations *between* both fences +*can* be freely reordered across the fences but none of those operations +involves `buf` so such reorderings do *not* result in undefined behavior. + +Note that `compiler_fence` is a bit stronger than what's required. For example, +the fences will prevent the operations on `x` from being merged even though we +know that `buf` doesn't overlap with `x` (due to Rust aliasing rules). However, +there exist no intrinsic that's more fine grained than `compiler_fence`. + +> **NOTE:** You *may* need to use a [`atomic::fence`][] (a memory barrier) +> instead of a `compiler_fence` if you expect your API to be used with buffers +> shared between different cores. But this depends on how you design your API. + +[`atomic::fence`]: https://doc.rust-lang.org/core/sync/atomic/fn.fence.html + +## Generic buffer + +Our API is more restrictive that it needs to be. For example, the following +program won't be accepted even though it's valid. + +``` rust +{{#include ../ci/dma/examples/five.rs:67:85}} +``` + +To accept such program we can make the buffer argument generic. + +``` rust +{{#include ../ci/dma/examples/five.rs:9:65}} +``` + +> **NOTE:** `AsRef<[u8]>` (`AsMut<[u8]>`) could have been used instead of +> `AsSlice` (`AsMutSlice **NOTE:** To compile all the programs below this point you'll need Rust +> `>=1.33.0`. As of time of writing (2019-01-04) that means using the nightly +> channel. + +``` rust +{{#include ../ci/dma/examples/six.rs:16:33}} +{{#include ../ci/dma/examples/six.rs:48:59}} +{{#include ../ci/dma/examples/six.rs:74:75}} +``` + +> **NOTE:** We could have used the [`StableDeref`] trait instead of the `Pin` +> newtype but opted for `Pin` since it's provided in the standard library. + +[`StableDeref`]: https://crates.io/crates/stable_deref_trait + +With this new API we can use `&'static mut` references, `Box`-ed slices, `Rc`-ed +slices, etc. + +``` rust +{{#include ../ci/dma/examples/six.rs:78:89}} +{{#include ../ci/dma/examples/six.rs:91:101}} +``` + +## `'static` bound + +Does pinning let us safely use stack allocated arrays? The answer is *no*. +Consider the following example. + +``` rust +{{#include ../ci/dma/examples/six.rs:104:123}} +{{#include ../ci/dma/examples/six.rs:125:132}} +``` + +As seen many times before, the above program runs into undefined behavior due to +stack frame corruption. + +The API is unsound for buffers of type `Pin<&'a mut [u8]>` where `'a` is *not* +`'static`. To prevent the problem we have to add a `'static` bound in some +places. + +``` rust +{{#include ../ci/dma/examples/seven.rs:15:25}} +{{#include ../ci/dma/examples/seven.rs:40:51}} +{{#include ../ci/dma/examples/seven.rs:66:67}} +``` + +Now the problematic program will be rejected. + +## Destructors + +Now that the API accepts `Box`-es and other types that have destructors we need +to decide what to do when `Transfer` is early-dropped. + +Normally, `Transfer` values are consumed using the `wait` method but it's also +possible to, implicitly or explicitly, `drop` the value before the transfer is +over. For example, dropping a `Transfer>` value will cause the buffer +to be deallocated. This can result in undefined behavior if the transfer is +still in progress as the DMA would end up writing to deallocated memory. + +In such scenario one option is to make `Transfer.drop` stop the DMA transfer. +The other option is to make `Transfer.drop` wait for the transfer to finish. +We'll pick the former option as it's cheaper. + +``` rust +{{#include ../ci/dma/examples/eight.rs:18:72}} +{{#include ../ci/dma/examples/eight.rs:82:99}} +{{#include ../ci/dma/examples/eight.rs:109:117}} +``` + +Now the DMA transfer will be stopped before the buffer is deallocated. + +``` rust +{{#include ../ci/dma/examples/eight.rs:120:134}} +``` + +## Summary + +To sum it up, we need to consider all the following points to achieve memory +safe DMA transfers: + +- Use immovable buffers plus indirection: `Pin`. Alternatively, you can use + the `StableDeref` trait. + +- The ownership of the buffer must be passed to the DMA : `B: 'static`. + +- Do *not* rely on destructors running for memory safety. Consider what happens + if `mem::forget` is used with your API. + +- *Do* add a custom destructor that stops the DMA transfer, or waits for it to + finish. Consider what happens if `mem::drop` is used with your API. + +--- + +This text leaves out up several details required to build a production grade +DMA abstraction, like configuring the DMA channels (e.g. streams, circular vs +one-shot mode, etc.), alignment of buffers, error handling, how to make the +abstraction device-agnostic, etc. All those aspects are left as an exercise for +the reader / community (`:P`). From f5410d191b305d76fa29eb7307c78d64d24cf8c3 Mon Sep 17 00:00:00 2001 From: Emil Fresk Date: Mon, 14 Jan 2019 15:38:38 +0100 Subject: [PATCH 2/9] Update ci/dma/examples/seven.rs Co-Authored-By: japaric --- ci/dma/examples/seven.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dma/examples/seven.rs b/ci/dma/examples/seven.rs index 86b61ac..3243b6e 100644 --- a/ci/dma/examples/seven.rs +++ b/ci/dma/examples/seven.rs @@ -90,7 +90,7 @@ impl Transfer { /// Blocks until the transfer is done and returns the buffer pub fn wait(self) -> (Pin, Serial1) { - while self.is_done() {} + while !self.is_done() {} atomic::compiler_fence(Ordering::Acquire); From 6f414fdb56cefcf9070d15789eb29e1e6fdef70c Mon Sep 17 00:00:00 2001 From: Emil Fresk Date: Mon, 14 Jan 2019 15:38:49 +0100 Subject: [PATCH 3/9] Update ci/dma/examples/four.rs Co-Authored-By: japaric --- ci/dma/examples/four.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dma/examples/four.rs b/ci/dma/examples/four.rs index 81f7b65..1b628ca 100644 --- a/ci/dma/examples/four.rs +++ b/ci/dma/examples/four.rs @@ -53,7 +53,7 @@ impl Transfer { /// Blocks until the transfer is done and returns the buffer pub fn wait(self) -> (B, Serial1) { // NOTE: this is a volatile *read* - while self.is_done() {} + while !self.is_done() {} // NOTE: added atomic::compiler_fence(Ordering::Acquire); From b26fe00c1284ec7f9ceec0a75696550607f955fc Mon Sep 17 00:00:00 2001 From: Emil Fresk Date: Mon, 14 Jan 2019 15:38:57 +0100 Subject: [PATCH 4/9] Update ci/dma/examples/five.rs Co-Authored-By: japaric --- ci/dma/examples/five.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dma/examples/five.rs b/ci/dma/examples/five.rs index c580256..d0f8a0a 100644 --- a/ci/dma/examples/five.rs +++ b/ci/dma/examples/five.rs @@ -135,7 +135,7 @@ impl Transfer { /// Blocks until the transfer is done and returns the buffer pub fn wait(self) -> (B, Serial1) { - while self.is_done() {} + while !self.is_done() {} atomic::compiler_fence(Ordering::Acquire); From 9f087674ff91d23451a5a521d07916940afae7be Mon Sep 17 00:00:00 2001 From: Emil Fresk Date: Mon, 14 Jan 2019 15:39:06 +0100 Subject: [PATCH 5/9] Update ci/dma/examples/eight.rs Co-Authored-By: japaric --- ci/dma/examples/eight.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dma/examples/eight.rs b/ci/dma/examples/eight.rs index ae8a01d..b7a9ab1 100644 --- a/ci/dma/examples/eight.rs +++ b/ci/dma/examples/eight.rs @@ -30,7 +30,7 @@ struct Inner { impl Transfer { /// Blocks until the transfer is done and returns the buffer pub fn wait(mut self) -> (Pin, Serial1) { - while self.is_done() {} + while !self.is_done() {} atomic::compiler_fence(Ordering::Acquire); From 128b6c9e1b4145dff208ccdb24817e6c8c63fdbd Mon Sep 17 00:00:00 2001 From: Emil Fresk Date: Mon, 14 Jan 2019 15:39:17 +0100 Subject: [PATCH 6/9] Update ci/dma/examples/six.rs Co-Authored-By: japaric --- ci/dma/examples/six.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dma/examples/six.rs b/ci/dma/examples/six.rs index 14e5702..5faf4be 100644 --- a/ci/dma/examples/six.rs +++ b/ci/dma/examples/six.rs @@ -149,7 +149,7 @@ impl Transfer { /// Blocks until the transfer is done and returns the buffer pub fn wait(self) -> (Pin, Serial1) { - while self.is_done() {} + while !self.is_done() {} atomic::compiler_fence(Ordering::Acquire); From 6835ec6f0bb7aaa4904c1895ba0f77057b555b26 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 14 Jan 2019 15:49:14 +0100 Subject: [PATCH 7/9] channel is configured in one-shot mode --- src/dma.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dma.md b/src/dma.md index 7df2311..aab0e11 100644 --- a/src/dma.md +++ b/src/dma.md @@ -19,8 +19,8 @@ Consider the following DMA primitives: ``` Assume that the `Dma1Channel1` is statically configured to work with serial port -(AKA UART or USART) #1: `Serial1`. `Serial1` provides the following *blocking* -API: +(AKA UART or USART) #1, `Serial1`, in one-shot mode (i.e. not circular mode). +`Serial1` provides the following *blocking* API: ``` rust {{#include ../ci/dma/src/lib.rs:62:72}} From ebc3110d1c38788bd3dfe53e07b07b6407fe2ec1 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 14 Jan 2019 15:59:29 +0100 Subject: [PATCH 8/9] elaborate on when a memory barrier is needed --- src/dma.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/dma.md b/src/dma.md index aab0e11..e82a55c 100644 --- a/src/dma.md +++ b/src/dma.md @@ -199,9 +199,27 @@ the fences will prevent the operations on `x` from being merged even though we know that `buf` doesn't overlap with `x` (due to Rust aliasing rules). However, there exist no intrinsic that's more fine grained than `compiler_fence`. -> **NOTE:** You *may* need to use a [`atomic::fence`][] (a memory barrier) -> instead of a `compiler_fence` if you expect your API to be used with buffers -> shared between different cores. But this depends on how you design your API. +### Don't we need a memory barrier? + +That depends on the target architecture. In the case of Cortex-M cores, [AN321] +says: + +[AN321]: https://static.docs.arm.com/dai0321/a/DAI0321A_programming_guide_memory_barriers_for_m_profile.pdf + +> The use of DMB is rarely needed in Cortex-M processors because they do not +> reorder memory transactions. However, it is needed if the software is to be +> reused on other ARM processors, especially multi-master systems. For example: +> +> - DMA controller configuration. A barrier is required between a CPU memory +> access and a DMA operation. +> +> (..) + +If your target is a multi-core system then it's very likely that you'll need +memory barriers. + +If you do need the memory barrier then you need to use [`atomic::fence`] instead +of `compiler_fence`. [`atomic::fence`]: https://doc.rust-lang.org/core/sync/atomic/fn.fence.html From 828228bed8ce0c5eff046c8baa903e2881c77f87 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 21 Jan 2019 15:10:58 +0100 Subject: [PATCH 9/9] note that memory barriers are needed when dealing with caches --- src/dma.md | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/dma.md b/src/dma.md index e82a55c..4c65b4d 100644 --- a/src/dma.md +++ b/src/dma.md @@ -201,11 +201,15 @@ there exist no intrinsic that's more fine grained than `compiler_fence`. ### Don't we need a memory barrier? -That depends on the target architecture. In the case of Cortex-M cores, [AN321] -says: +That depends on the target architecture. In the case of Cortex M0 to M4F cores, +[AN321] says: [AN321]: https://static.docs.arm.com/dai0321/a/DAI0321A_programming_guide_memory_barriers_for_m_profile.pdf +> 3.2 Typical usages +> +> (..) +> > The use of DMB is rarely needed in Cortex-M processors because they do not > reorder memory transactions. However, it is needed if the software is to be > reused on other ARM processors, especially multi-master systems. For example: @@ -214,12 +218,29 @@ says: > access and a DMA operation. > > (..) +> +> 4.18 Multi-master systems +> +> (..) +> +> Omitting the DMB or DSB instruction in the examples in Figure 41 on page 47 +> and Figure 42 would not cause any error because the Cortex-M processors: +> +> - do not re-order memory transfers +> - do not permit two write transfers to be overlapped. + +Where Figure 41 shows a DMB (memory barrier) instruction being used before +starting a DMA transaction. + +In the case of Cortex-M7 cores you'll need memory barriers (DMB/DSB) if you are +using the data cache (DCache), unless you manually invalidate the buffer used by +the DMA. If your target is a multi-core system then it's very likely that you'll need memory barriers. If you do need the memory barrier then you need to use [`atomic::fence`] instead -of `compiler_fence`. +of `compiler_fence`. That should generate a DMB instruction on Cortex-M devices. [`atomic::fence`]: https://doc.rust-lang.org/core/sync/atomic/fn.fence.html