diff --git a/.github/workflows/sim-apps-job/test_apps.py b/.github/workflows/sim-apps-job/test_apps.py index d22b876bd..2804c25cb 100755 --- a/.github/workflows/sim-apps-job/test_apps.py +++ b/.github/workflows/sim-apps-job/test_apps.py @@ -33,6 +33,7 @@ class BColors: "example_spi_read", "example_spidma_powergate", "example_spi_write", + "example_dma_subaddressing", ] app_list = [app for app in os.listdir("sw/applications")] diff --git a/core-v-mini-mcu.core b/core-v-mini-mcu.core index aef1f20e2..e07824be3 100644 --- a/core-v-mini-mcu.core +++ b/core-v-mini-mcu.core @@ -339,6 +339,7 @@ targets: vsim_options: - -sv_lib ../../../hw/vendor/lowrisc_opentitan/hw/dv/dpi/uartdpi/uartdpi - -sv_lib ../../../hw/vendor/pulp_platform_pulpissimo/rtl/tb/remote_bitbang/librbs + - -voptargs=+acc=npr vcs: vcs_options: - -override_timescale=1ns/1ps diff --git a/docs/source/Peripherals/DMA.md b/docs/source/Peripherals/DMA.md index 22bbc0694..88844c468 100644 --- a/docs/source/Peripherals/DMA.md +++ b/docs/source/Peripherals/DMA.md @@ -318,6 +318,8 @@ The previous parameters, including the register offsets, can be found at `sw/dev - 0: _linear mode_ - 1: _circular mode_ - 2: _address mode_ + - 3: _subaddress mode_ + - 4: _hardware fifo mode_
@@ -560,7 +562,7 @@ If senseless configurations are input to functions, assertions may halt the whol #### Transaction modes -There are three different transaction modes: +There are five different transaction modes: **Single Mode:** The default mode, where the DMA channel will perform the copy from the source target to the destination, and trigger an interrupt once done. @@ -569,6 +571,10 @@ There are three different transaction modes: **Address Mode:** Instead of using the destination pointer and increment to decide where to copy information, an _address list_ must be provided, containing addresses for each data unit being copied. It is only carried out in _single_ mode. In this mode it's possible to perform only 1D transactions. +**Subaddress Mode:** In this mode, the DMA can be configured to transfer words, half words or bytes from a slot (e.g. SPI) or whichever fixed location to another destination target one. This mode is mostly useful when, in case of fixed source target, the source data type is a half word or a byte. This mode allows the DMA to sequentially read the half words or bytes composing the word retrieved from the slot (or fixed location), and to forward them to the destination target. + +**Hardware Fifo Mode:** the DMA fetches data from the source target and forwards it directly to an external accelerator tightly coupled with the DMA itself. The accelerator must have two internal fifos. The first one, referred to as hardware read fifo, is filled with source target data directly from the DMA. Then, the accelerator is in charge of popping from the hardware read fifo and processing the data. In the end, the results must be pushed into another fifo, referred to as hardware write fifo. The DMA reads data from the hardware write fifo and store it into the destination target. + #### Windows @@ -747,8 +753,9 @@ Here is a brief overview of the examples: 6) Matrix zero padding 7) Multichannel mem2mem transaction, focusing on the IRQ handler 8) Multichannel flash2mem transaction using the SPI FLASH +9) Single-channel flash2mem transactions with different data widths (bytes, half-words and words) using the SPI FLASH -The complete code for these examples can be found in `sw/applications/example_dma`, `sw/applications/example_dma_2d`, `sw/applications/example_dma_multichannel` and `sw/applications/example_dma_sdk`. These applications offer both verification and performance estimation modes, enabling users to verify the DMA and measure the application's execution time. +The complete code for these examples can be found in `sw/applications/example_dma`, `sw/applications/example_dma_2d`, `sw/applications/example_dma_multichannel`, `sw/applications/example_dma_sdk` and `sw/applications/example_dma_subaddressing`. These applications offer both verification and performance estimation modes, enabling users to verify the DMA and measure the application's execution time. The user is strongly incouraged to look at these applications, as well as any other application that employs the DMA, to gain insight in practical examples of the use of this peripheral. Some aspects or specific usecases might in fact not be present in this guide and could be found in the applications. diff --git a/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv b/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv index f054c37b4..d9932b530 100644 --- a/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv +++ b/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv @@ -6,6 +6,7 @@ module ao_peripheral_subsystem import obi_pkg::*; import reg_pkg::*; import power_manager_pkg::*; + import hw_fifo_pkg::*; #( parameter AO_SPC_NUM = 0, //do not touch these parameters @@ -76,6 +77,9 @@ module ao_peripheral_subsystem output logic dma_done_intr_o, output logic dma_window_intr_o, + output hw_fifo_req_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_req_o, + input hw_fifo_resp_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_resp_i, + // External PADs output reg_req_t pad_req_o, input reg_rsp_t pad_resp_i, @@ -395,6 +399,8 @@ module ao_peripheral_subsystem .reg_rsp_t(reg_pkg::reg_rsp_t), .obi_req_t(obi_pkg::obi_req_t), .obi_resp_t(obi_pkg::obi_resp_t), + .hw_fifo_req_t(hw_fifo_pkg::hw_fifo_req_t), + .hw_fifo_resp_t(hw_fifo_pkg::hw_fifo_resp_t), .GLOBAL_SLOT_NUM(DMA_GLOBAL_TRIGGER_SLOT_NUM), .EXT_SLOT_NUM(DMA_EXT_TRIGGER_SLOT_NUM) ) dma_subsystem_i ( @@ -409,6 +415,8 @@ module ao_peripheral_subsystem .dma_write_resp_i, .dma_addr_req_o, .dma_addr_resp_i, + .hw_fifo_req_o, + .hw_fifo_resp_i, .global_trigger_slot_i(dma_global_trigger_slots), .ext_trigger_slot_i(dma_ext_trigger_slots), .ext_dma_stop_i(ext_dma_stop_i), diff --git a/hw/core-v-mini-mcu/core_v_mini_mcu.sv b/hw/core-v-mini-mcu/core_v_mini_mcu.sv index 3ebc1e77f..6d8dbdcc1 100644 --- a/hw/core-v-mini-mcu/core_v_mini_mcu.sv +++ b/hw/core-v-mini-mcu/core_v_mini_mcu.sv @@ -5,6 +5,7 @@ module core_v_mini_mcu import obi_pkg::*; import reg_pkg::*; + import hw_fifo_pkg::*; #( parameter COREV_PULP = 0, parameter FPU = 0, @@ -304,6 +305,9 @@ module core_v_mini_mcu output obi_req_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] ext_dma_addr_req_o, input obi_resp_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] ext_dma_addr_resp_i, + output hw_fifo_req_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_req_o, + input hw_fifo_resp_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_resp_i, + input logic [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] ext_dma_stop_i, output reg_req_t ext_peripheral_slave_req_o, @@ -665,6 +669,8 @@ module core_v_mini_mcu .dma_addr_resp_i(dma_addr_resp), .dma_done_intr_o(dma_done_intr), .dma_window_intr_o(dma_window_intr), + .hw_fifo_req_o, + .hw_fifo_resp_i, .spi_flash_intr_event_o(spi_flash_intr), .pad_req_o, .pad_resp_i, diff --git a/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl b/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl index aedf2e2c3..e63035160 100644 --- a/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl +++ b/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl @@ -5,6 +5,7 @@ module core_v_mini_mcu import obi_pkg::*; import reg_pkg::*; + import hw_fifo_pkg::*; #( parameter COREV_PULP = 0, parameter FPU = 0, @@ -58,6 +59,9 @@ ${pad.core_v_mini_mcu_interface} output obi_req_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] ext_dma_addr_req_o, input obi_resp_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] ext_dma_addr_resp_i, + output hw_fifo_req_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_req_o, + input hw_fifo_resp_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_resp_i, + input logic [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] ext_dma_stop_i, output reg_req_t ext_peripheral_slave_req_o, @@ -413,6 +417,8 @@ ${pad.core_v_mini_mcu_interface} .dma_addr_resp_i(dma_addr_resp), .dma_done_intr_o(dma_done_intr), .dma_window_intr_o(dma_window_intr), + .hw_fifo_req_o, + .hw_fifo_resp_i, .spi_flash_intr_event_o(spi_flash_intr), .pad_req_o, .pad_resp_i, diff --git a/hw/core-v-mini-mcu/include/hw_fifo_pkg.sv b/hw/core-v-mini-mcu/include/hw_fifo_pkg.sv new file mode 100644 index 000000000..9733ea4cc --- /dev/null +++ b/hw/core-v-mini-mcu/include/hw_fifo_pkg.sv @@ -0,0 +1,20 @@ +// Copyright 2022 OpenHW Group +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +package hw_fifo_pkg; + + typedef struct packed { + logic pop; + logic push; + logic [31:0] data; + } hw_fifo_req_t; + + typedef struct packed { + logic empty; + logic full; + logic push; + logic [31:0] data; + } hw_fifo_resp_t; + +endpackage diff --git a/hw/core-v-mini-mcu/include/x-heep_packages.core b/hw/core-v-mini-mcu/include/x-heep_packages.core index 6cbf61b01..da77211b4 100644 --- a/hw/core-v-mini-mcu/include/x-heep_packages.core +++ b/hw/core-v-mini-mcu/include/x-heep_packages.core @@ -14,6 +14,7 @@ filesets: - obi_pkg.sv - reg_pkg.sv - power_manager_pkg.sv + - hw_fifo_pkg.sv - core_v_mini_mcu_pkg.sv file_type: systemVerilogSource diff --git a/hw/ip/dma/data/dma.hjson b/hw/ip/dma/data/dma.hjson index 4f144c612..f6656add9 100644 --- a/hw/ip/dma/data/dma.hjson +++ b/hw/ip/dma/data/dma.hjson @@ -186,12 +186,14 @@ hwaccess: "hro", resval: 0, fields: [ - { bits: "1:0", name: "MODE", + { bits: "2:0", name: "MODE", desc: "DMA operation mode", enum: [ { value: "0", name: "LINEAR_MODE", desc: "Transfers data linearly"}, { value: "1", name: "CIRCULAR_MODE", desc: "Transfers data in circular mode"}, - { value: "2", name: "ADDRESS_MODE" , desc: "Transfers data using as destination address the data from ADD_PTR"}, + { value: "2", name: "ADDRESS_MODE", desc: "Transfers data using as destination address the data from ADD_PTR"}, + { value: "3", name: "SUBADDRESS_MODE", desc: "Implements transferring of data when SRC_PTR is fixed and related to a peripheral"}, + { value: "4", name: "HW_FIFO_MODE", desc: "Mode for exploting external stream accelerators"} ] } ] @@ -302,6 +304,23 @@ fields: [ { bits: "0", name: "FLAG", desc: "Set for window done interrupt" } ] - } + }, + { name: "HW_FIFO_MODE_SIGN_EXT", + desc: '''In HW_FIFO_MODE, is the input data to be sign extended before sending it to the hw read fifo? + (The input of the hw read fifo is on 32 bits, which could be wider than the src data type)''', + swaccess: "rw", + hwaccess: "hro", + resval: 0, + + fields: [ + { bits: "0", name: "HW_FIFO_SIGNED", + desc: "Extend the sign to 32 bits", + enum: [ + { value: "0", name: "NO_EXTEND", desc: "Does not extend the sign"}, + { value: "1", name: "EXTEND", desc: "Extends the sign"}, + ] + } + ] + }, ] } diff --git a/hw/ip/dma/dma.core b/hw/ip/dma/dma.core index 0bf343e74..d0e3978a7 100644 --- a/hw/ip/dma/dma.core +++ b/hw/ip/dma/dma.core @@ -18,6 +18,8 @@ filesets: - rtl/dma_obiread_fsm.sv - rtl/dma_obiread_addr_fsm.sv - rtl/dma_obiwrite_fsm.sv + - rtl/hw_r_fifo_ctrl.sv + - rtl/hw_w_fifo_ctrl.sv - rtl/dma.sv file_type: systemVerilogSource diff --git a/hw/ip/dma/rtl/dma.sv b/hw/ip/dma/rtl/dma.sv index ef3ce5434..0f3190e6d 100644 --- a/hw/ip/dma/rtl/dma.sv +++ b/hw/ip/dma/rtl/dma.sv @@ -12,6 +12,8 @@ module dma #( parameter type reg_rsp_t = logic, parameter type obi_req_t = logic, parameter type obi_resp_t = logic, + parameter type hw_fifo_req_t = logic, + parameter type hw_fifo_resp_t = logic, parameter int unsigned SLOT_NUM = 0 ) ( input logic clk_i, @@ -32,6 +34,9 @@ module dma #( output obi_req_t dma_addr_req_o, input obi_resp_t dma_addr_resp_i, + input hw_fifo_resp_t hw_fifo_resp_i, + output hw_fifo_req_t hw_fifo_req_o, + input logic [SLOT_NUM-1:0] trigger_slot_i, output dma_done_intr_o, @@ -77,6 +82,7 @@ module dma #( logic [31:0] data_in_addr; logic data_in_gnt; logic data_in_rvalid; + logic data_in_rvalid_fifo; logic [31:0] data_in_rdata; logic data_addr_in_req; @@ -105,15 +111,17 @@ module dma #( logic dma_window_intr_n; /* FIFO signals */ - logic [Addr_Fifo_Depth-1:0] read_fifo_usage; + logic [3:0][Addr_Fifo_Depth-1:0] read_fifo_usage; logic [Addr_Fifo_Depth-1:0] read_addr_fifo_usage; logic [Addr_Fifo_Depth-1:0] write_fifo_usage; logic fifo_flush; - logic read_fifo_full; - logic read_fifo_empty; + logic [3:0] read_fifo_full; + logic [3:0] read_fifo_empty; logic read_fifo_alm_full; - logic read_fifo_pop; + logic pad_read_fifo_pop; + logic [3:0] subaddr_read_fifo_pop; + logic [3:0] read_fifo_pop; logic [31:0] read_fifo_input; logic [31:0] read_fifo_output; @@ -125,10 +133,12 @@ module dma #( logic write_fifo_full; logic write_fifo_empty; logic write_fifo_alm_full; + logic pad_write_fifo_push; logic write_fifo_push; logic write_fifo_pop; - logic [31:0] write_fifo_input; + logic [31:0] pad_write_fifo_input; logic [31:0] write_fifo_output; + logic [31:0] write_fifo_input; /* Trigger signals */ logic wait_for_rx; @@ -151,8 +161,12 @@ module dma #( } dma_state_q, dma_state_d; + dma_data_type_t src_data_type; + logic circular_mode; logic address_mode; + logic subaddressing_mode; + logic hw_fifo_mode; logic dma_start_pending; @@ -181,27 +195,88 @@ module dma #( assign clk_cg = clk_i & clk_gate_en_ni; `endif + fifo_v3 #( + .DEPTH(FIFO_DEPTH), + .FALL_THROUGH(1'b0), + .DATA_WIDTH(8) + ) dma_read_fifo_0_i ( + .clk_i(clk_cg), + .rst_ni, + .flush_i(fifo_flush), + .testmode_i(1'b0), + // status flags + .full_o(read_fifo_full[0]), + .empty_o(read_fifo_empty[0]), + .usage_o(read_fifo_usage[0]), + // as long as the queue is not full we can push new data + .data_i(read_fifo_input[7:0]), + .push_i(data_in_rvalid_fifo), + // as long as the queue is not empty we can pop new elements + .data_o(read_fifo_output[7:0]), + .pop_i(read_fifo_pop[0]) + ); + fifo_v3 #( + .DEPTH(FIFO_DEPTH), + .FALL_THROUGH(1'b0), + .DATA_WIDTH(8) + ) dma_read_fifo_1_i ( + .clk_i(clk_cg), + .rst_ni, + .flush_i(fifo_flush), + .testmode_i(1'b0), + // status flags + .full_o(read_fifo_full[1]), + .empty_o(read_fifo_empty[1]), + .usage_o(read_fifo_usage[1]), + // as long as the queue is not full we can push new data + .data_i(read_fifo_input[15:8]), + .push_i(data_in_rvalid_fifo), + // as long as the queue is not empty we can pop new elements + .data_o(read_fifo_output[15:8]), + .pop_i(read_fifo_pop[1]) + ); - /* Read FIFO */ fifo_v3 #( .DEPTH(FIFO_DEPTH), - .FALL_THROUGH(1'b0) - ) dma_read_fifo_i ( + .FALL_THROUGH(1'b0), + .DATA_WIDTH(8) + ) dma_read_fifo_2_i ( .clk_i(clk_cg), .rst_ni, .flush_i(fifo_flush), .testmode_i(1'b0), // status flags - .full_o(read_fifo_full), - .empty_o(read_fifo_empty), - .usage_o(read_fifo_usage), + .full_o(read_fifo_full[2]), + .empty_o(read_fifo_empty[2]), + .usage_o(read_fifo_usage[2]), // as long as the queue is not full we can push new data - .data_i(read_fifo_input), - .push_i(data_in_rvalid), + .data_i(read_fifo_input[23:16]), + .push_i(data_in_rvalid_fifo), // as long as the queue is not empty we can pop new elements - .data_o(read_fifo_output), - .pop_i(read_fifo_pop) + .data_o(read_fifo_output[23:16]), + .pop_i(read_fifo_pop[2]) + ); + + fifo_v3 #( + .DEPTH(FIFO_DEPTH), + .FALL_THROUGH(1'b0), + .DATA_WIDTH(8) + ) dma_read_fifo_3_i ( + .clk_i(clk_cg), + .rst_ni, + .flush_i(fifo_flush), + .testmode_i(1'b0), + // status flags + .full_o(read_fifo_full[3]), + .empty_o(read_fifo_empty[3]), + .usage_o(read_fifo_usage[3]), + // as long as the queue is not full we can push new data + .data_i(read_fifo_input[31:24]), + .push_i(data_in_rvalid_fifo), + // as long as the queue is not empty we can pop new elements + .data_o(read_fifo_output[31:24]), + .pop_i(read_fifo_pop[3]) ); /* Read address mode FIFO */ @@ -267,9 +342,13 @@ module dma #( .dma_start_i(dma_start), .dma_done_i(dma_done), .ext_dma_stop_i, - .read_fifo_full_i(read_fifo_full), + .read_fifo_full_i(|read_fifo_full), .read_fifo_alm_full_i(read_fifo_alm_full), .wait_for_rx_i(wait_for_rx), + + .hw_fifo_mode_i (hw_fifo_mode), + .hw_r_fifo_full_i(hw_fifo_resp_i.full), + .data_in_gnt_i(data_in_gnt), .data_in_rvalid_i(data_in_rvalid), .data_in_rdata_i(data_in_rdata), @@ -298,6 +377,8 @@ module dma #( .data_addr_in_addr_o(data_addr_in_addr) ); + logic hw_r_fifo_push_padding; + /* DMA padding FSM */ dma_padding_fsm dma_padding_fsm_i ( .clk_i(clk_cg), @@ -305,26 +386,38 @@ module dma #( .reg2hw_i(reg2hw), .dma_padding_fsm_on_i(dma_padding_fsm_on), .dma_start_i(dma_start), - .read_fifo_empty_i(read_fifo_empty), + .read_fifo_empty_i(&(read_fifo_empty)), .write_fifo_full_i(write_fifo_full), .write_fifo_alm_full_i(write_fifo_alm_full), .data_read_i(read_fifo_output), + + .hw_fifo_mode_i(hw_fifo_mode), + .hw_r_fifo_push_padding_o(hw_r_fifo_push_padding), + .hw_w_fifo_push_i(hw_fifo_resp_i.push), + .padding_fsm_done_o(padding_fsm_done), - .write_fifo_push_o(write_fifo_push), - .read_fifo_pop_o(read_fifo_pop), - .data_write_o(write_fifo_input) + .write_fifo_push_o(pad_write_fifo_push), + .read_fifo_pop_o(pad_read_fifo_pop), + .data_write_o(pad_write_fifo_input) ); + logic [31:0] hw_w_fifo_data; /* Write FSM */ dma_obiwrite_fsm dma_obiwrite_fsm_i ( .clk_i(clk_cg), .rst_ni, .reg2hw_i(reg2hw), + // .reg_req_i, .dma_start_i(dma_start), .write_fifo_empty_i(write_fifo_empty), .read_addr_fifo_empty_i(read_addr_fifo_empty), .fifo_output_i(write_fifo_output), .wait_for_tx_i(wait_for_tx), + + .hw_fifo_mode_i(hw_fifo_mode), + .hw_w_fifo_data_i(hw_w_fifo_data), + .hw_w_fifo_empty_i(hw_fifo_resp_i.empty), + .address_mode_i(address_mode), .padding_fsm_done_i(padding_fsm_done), .fifo_addr_output_i(read_addr_fifo_output), @@ -337,6 +430,26 @@ module dma #( .dma_done_o(dma_done) ); + /* Hardware Read Fifo Interface Controller */ + hw_r_fifo_ctrl hw_r_fifo_ctrl_i ( + .hw_fifo_mode_i(hw_fifo_mode), + .data_i(read_fifo_input), + .data_valid_i(data_in_rvalid), + .hw_r_fifo_push_padding_i(hw_r_fifo_push_padding), + .push_o(hw_fifo_req_o.push), + .data_o(hw_fifo_req_o.data) + ); + + + /* Hardware Write Fifo Interface Controller */ + hw_w_fifo_ctrl hw_w_fifo_ctrl_i ( + .hw_fifo_mode_i(hw_fifo_mode), + .data_o(hw_w_fifo_data), + .data_out_gnt_i(write_fifo_pop), + .data_i(hw_fifo_resp_i.data), + .pop_o(hw_fifo_req_o.pop) + ); + /*_________________________________________________________________________________________________________________________________ */ /* FSMs instantiation */ @@ -495,6 +608,129 @@ module dma #( end end + // Subaddressing mode controlling logic + + always_ff @(posedge clk_cg, negedge rst_ni) begin + if (~rst_ni) begin + subaddr_read_fifo_pop <= 4'b0000; + end else begin + if (subaddressing_mode == 1'b1) begin + case (src_data_type) + DMA_DATA_TYPE_HALF_WORD: begin + + if (dma_start == 1'b1) begin + subaddr_read_fifo_pop <= 4'b0011; + end else if (pad_read_fifo_pop == 1'b1) begin + if (subaddr_read_fifo_pop == 4'b1100) begin + subaddr_read_fifo_pop <= 4'b0011; + end else begin + subaddr_read_fifo_pop <= 4'b1100; + end + end + end + + DMA_DATA_TYPE_BYTE: begin + + if (dma_start == 1'b1) begin + subaddr_read_fifo_pop <= 4'b0001; + end else if (pad_read_fifo_pop == 1'b1) begin + if (subaddr_read_fifo_pop == 4'b1000) begin + subaddr_read_fifo_pop <= 4'b0001; + end else begin + subaddr_read_fifo_pop <= subaddr_read_fifo_pop << 1; + end + end + end + + default: subaddr_read_fifo_pop <= {4{pad_read_fifo_pop}}; + + endcase + end else if (hw_fifo_mode == 1'b1) begin + // hw fifo mode: no pop is needed from the internal read fifo + subaddr_read_fifo_pop <= 4'b0000; + end else begin + // other modes: normal popping from the internal read fifo + subaddr_read_fifo_pop <= {4{pad_read_fifo_pop}}; + end + end + end + + always_comb begin + if (subaddressing_mode == 1'b1) begin + if (pad_read_fifo_pop == 1'b1) begin + case (src_data_type) + DMA_DATA_TYPE_HALF_WORD: begin + + read_fifo_pop = subaddr_read_fifo_pop; + + if (subaddr_read_fifo_pop == 4'b0000) begin + write_fifo_input = '0; + write_fifo_push = 1'b0; + end else if (subaddr_read_fifo_pop == 4'b1100) begin + write_fifo_input = {{16{1'b0}}, pad_write_fifo_input[31:16]}; + write_fifo_push = 1'b1; + end else if (subaddr_read_fifo_pop == 4'b0011) begin + write_fifo_input = {{16{1'b0}}, pad_write_fifo_input[15:0]}; + write_fifo_push = 1'b1; + end else begin + write_fifo_input = pad_write_fifo_input; + write_fifo_push = pad_write_fifo_push; + end + + end + + DMA_DATA_TYPE_BYTE: begin + + read_fifo_pop = subaddr_read_fifo_pop; + + if (subaddr_read_fifo_pop == 4'b0000) begin + write_fifo_input = '0; + write_fifo_push = 1'b0; + end else if (subaddr_read_fifo_pop == 4'b1000) begin + write_fifo_input = {{24{1'b0}}, pad_write_fifo_input[31:24]}; + write_fifo_push = 1'b1; + end else if (subaddr_read_fifo_pop == 4'b0100) begin + write_fifo_input = {{24{1'b0}}, pad_write_fifo_input[23:16]}; + write_fifo_push = 1'b1; + end else if (subaddr_read_fifo_pop == 4'b0010) begin + write_fifo_input = {{24{1'b0}}, pad_write_fifo_input[15:8]}; + write_fifo_push = 1'b1; + end else if (subaddr_read_fifo_pop == 4'b0001) begin + write_fifo_input = {{24{1'b0}}, pad_write_fifo_input[7:0]}; + write_fifo_push = 1'b1; + end else begin + write_fifo_input = pad_write_fifo_input; + write_fifo_push = pad_write_fifo_push; + end + + end + + default: begin + write_fifo_input = pad_write_fifo_input; + write_fifo_push = pad_write_fifo_push; + read_fifo_pop = {4{pad_read_fifo_pop}}; + end + + endcase + end else begin + // no pop from read fifo issued by padding fsm + write_fifo_input = '0; + write_fifo_push = 1'b0; + read_fifo_pop = {4{pad_read_fifo_pop}}; + end + end else if (hw_fifo_mode == 1'b1) begin + // hw fifo mode: no pop and no push are needed from the internal read fifo + // and to the internal write fifo respectively + write_fifo_input = '0; + write_fifo_push = 1'b0; + read_fifo_pop = {4{1'b0}}; + end else begin + // other modes + write_fifo_input = pad_write_fifo_input; + write_fifo_push = pad_write_fifo_push; + read_fifo_pop = {4{pad_read_fifo_pop}}; + end + end /*_________________________________________________________________________________________________________________________________ */ @@ -512,6 +748,7 @@ module dma #( assign data_in_gnt = dma_read_resp_i.gnt; assign data_in_rvalid = dma_read_resp_i.rvalid; + assign data_in_rvalid_fifo = dma_read_resp_i.rvalid && ~hw_fifo_mode; assign data_in_rdata = dma_read_resp_i.rdata; assign dma_addr_req_o.req = data_addr_in_req; @@ -550,11 +787,16 @@ module dma #( assign circular_mode = reg2hw.mode.q == 1; assign address_mode = reg2hw.mode.q == 2; + assign subaddressing_mode = reg2hw.mode.q == 3; + assign hw_fifo_mode = reg2hw.mode.q == 4; assign wait_for_rx = |(reg2hw.slot.rx_trigger_slot.q[SLOT_NUM-1:0] & (~trigger_slot_i)); assign wait_for_tx = |(reg2hw.slot.tx_trigger_slot.q[SLOT_NUM-1:0] & (~trigger_slot_i)); - assign read_fifo_alm_full = (read_fifo_usage == LastFifoUsage[Addr_Fifo_Depth-1:0]); + assign read_fifo_alm_full = (read_fifo_usage[0] == LastFifoUsage[Addr_Fifo_Depth-1:0]) & + (read_fifo_usage[1] == LastFifoUsage[Addr_Fifo_Depth-1:0]) & + (read_fifo_usage[2] == LastFifoUsage[Addr_Fifo_Depth-1:0]) & + (read_fifo_usage[3] == LastFifoUsage[Addr_Fifo_Depth-1:0]); assign read_addr_fifo_alm_full = (read_addr_fifo_usage == LastFifoUsage[Addr_Fifo_Depth-1:0]); assign write_fifo_alm_full = (write_fifo_usage == LastFifoUsage[Addr_Fifo_Depth-1:0]); @@ -563,4 +805,7 @@ module dma #( // Count gnt write transaction and generate event pulse if WINDOW_SIZE is reached assign dma_window_event = |reg2hw.window_size.q & data_out_gnt & (window_counter + 'h1 >= {19'h0, reg2hw.window_size.q}); + + assign src_data_type = dma_data_type_t'(reg2hw.src_data_type.q); + endmodule : dma diff --git a/hw/ip/dma/rtl/dma_obiread_fsm.sv b/hw/ip/dma/rtl/dma_obiread_fsm.sv index 15103796b..64a8663f2 100644 --- a/hw/ip/dma/rtl/dma_obiread_fsm.sv +++ b/hw/ip/dma/rtl/dma_obiread_fsm.sv @@ -22,10 +22,13 @@ module dma_obiread_fsm input logic read_fifo_full_i, input logic read_fifo_alm_full_i, input logic wait_for_rx_i, + + input logic hw_r_fifo_full_i, + input logic hw_fifo_mode_i, + input logic data_in_gnt_i, input logic data_in_rvalid_i, input logic [31:0] data_in_rdata_i, - output logic [31:0] fifo_input_o, output logic data_in_req_o, output logic data_in_we_o, @@ -47,6 +50,13 @@ module dma_obiread_fsm /* Registers */ dma_reg2hw_t reg2hw; + typedef enum logic [1:0] { + DMA_DATA_TYPE_WORD, + DMA_DATA_TYPE_HALF_WORD, + DMA_DATA_TYPE_BYTE, + DMA_DATA_TYPE_BYTE_ + } dma_data_type_t; + enum logic { DMA_READ_FSM_IDLE, DMA_READ_FSM_ON @@ -85,6 +95,9 @@ module dma_obiread_fsm /* FIFO signals */ logic [31:0] fifo_input; + dma_data_type_t src_data_type; + logic hw_fifo_sign_extend; + /*_________________________________________________________________________________________________________________________________ */ /* FSMs instantiation */ @@ -219,7 +232,7 @@ module dma_obiread_fsm end else begin dma_read_fsm_n_state = DMA_READ_FSM_ON; // Wait if fifo is full, almost full (last data), or if the SPI RX does not have valid data (only in SPI mode 1). - if (fifo_full == 1'b0 && fifo_alm_full == 1'b0 && wait_for_rx == 1'b0) begin + if (fifo_full == 1'b0 && fifo_alm_full == 1'b0 && wait_for_rx == 1'b0 && (~(hw_r_fifo_full_i && hw_fifo_mode_i))) begin data_in_req = 1'b1; data_in_we = 1'b0; data_in_be = 4'b1111; // always read all bytes @@ -234,7 +247,7 @@ module dma_obiread_fsm // The read operation is the same in both cases dma_read_fsm_n_state = DMA_READ_FSM_ON; // Wait if fifo is full, almost full (last data), or if the SPI RX does not have valid data (only in SPI mode 1). - if (fifo_full == 1'b0 && fifo_alm_full == 1'b0 && wait_for_rx == 1'b0) begin + if (fifo_full == 1'b0 && fifo_alm_full == 1'b0 && wait_for_rx == 1'b0 && (~(hw_r_fifo_full_i && hw_fifo_mode_i))) begin data_in_req = 1'b1; data_in_we = 1'b0; data_in_be = 4'b1111; // always read all bytes @@ -272,22 +285,65 @@ module dma_obiread_fsm fifo_input[23:16] = data_in_rdata[23:16]; fifo_input[31:24] = data_in_rdata[31:24]; - case (read_ptr_valid_reg[1:0]) - 2'b00: ; - 2'b01: fifo_input[7:0] = data_in_rdata[15:8]; + /* + In case of hw fifo mode, depending on the source data type, the input data to the hw read fifo + could be also a half word or a byte, which could also be signed extended before being written into the hw read fifo. + */ + + if (hw_fifo_mode_i) begin + + case (read_ptr_valid_reg[1:0]) + 2'b00: begin + case (src_data_type) + DMA_DATA_TYPE_BYTE: + fifo_input[31:8] = (hw_fifo_sign_extend) ? {24{data_in_rdata[7]}} : {24{1'b0}}; + DMA_DATA_TYPE_HALF_WORD: + fifo_input[31:16] = (hw_fifo_sign_extend) ? {16{data_in_rdata[15]}} : {16{1'b0}}; + default: ; + endcase + end + 2'b01: begin + fifo_input[31:8] = (hw_fifo_sign_extend) ? {24{data_in_rdata[15]}} : {24{1'b0}}; + fifo_input[7:0] = data_in_rdata[15:8]; + end + 2'b10: begin + case (src_data_type) + DMA_DATA_TYPE_BYTE: begin + fifo_input[31:8] = (hw_fifo_sign_extend) ? {24{data_in_rdata[23]}} : {24{1'b0}}; + fifo_input[7:0] = data_in_rdata[23:16]; + end + DMA_DATA_TYPE_HALF_WORD: begin + fifo_input[31:16] = (hw_fifo_sign_extend) ? {16{data_in_rdata[31]}} : {16{1'b0}}; + fifo_input[15:0] = data_in_rdata[31:16]; + end + default: ; + endcase + end + 2'b11: begin + fifo_input[31:8] = (hw_fifo_sign_extend) ? {24{data_in_rdata[31]}} : {24{1'b0}}; + fifo_input[7:0] = data_in_rdata[31:24]; + end + endcase + + end else begin + case (read_ptr_valid_reg[1:0]) + 2'b00: ; + 2'b01: fifo_input[7:0] = data_in_rdata[15:8]; - 2'b10: begin - fifo_input[7:0] = data_in_rdata[23:16]; - fifo_input[15:8] = data_in_rdata[31:24]; - end + 2'b10: begin + fifo_input[7:0] = data_in_rdata[23:16]; + fifo_input[15:8] = data_in_rdata[31:24]; + end - 2'b11: fifo_input[7:0] = data_in_rdata[31:24]; - endcase + 2'b11: fifo_input[7:0] = data_in_rdata[31:24]; + endcase + end end /*_________________________________________________________________________________________________________________________________ */ /* Signal assignments */ + assign hw_fifo_sign_extend = reg2hw.hw_fifo_mode_sign_ext.q; /* Renaming */ assign reg2hw = reg2hw_i; @@ -308,6 +364,6 @@ module dma_obiread_fsm assign data_in_rvalid = data_in_rvalid_i; assign data_in_rdata = data_in_rdata_i; assign fifo_input_o = fifo_input; - + assign src_data_type = dma_data_type_t'(reg2hw.src_data_type.q); endmodule diff --git a/hw/ip/dma/rtl/dma_obiwrite_fsm.sv b/hw/ip/dma/rtl/dma_obiwrite_fsm.sv index f07c1ab8f..0cd1f6944 100644 --- a/hw/ip/dma/rtl/dma_obiwrite_fsm.sv +++ b/hw/ip/dma/rtl/dma_obiwrite_fsm.sv @@ -21,11 +21,15 @@ module dma_obiwrite_fsm input logic read_addr_fifo_empty_i, input logic [31:0] fifo_output_i, input logic wait_for_tx_i, + + input logic hw_w_fifo_empty_i, + input logic hw_fifo_mode_i, + input logic [31:0] hw_w_fifo_data_i, + input logic address_mode_i, input logic padding_fsm_done_i, input logic data_out_gnt_i, input logic [31:0] fifo_addr_output_i, - output logic data_out_req_o, output logic data_out_we_o, output logic [3:0] data_out_be_o, @@ -211,14 +215,18 @@ module dma_obiwrite_fsm // Read one word DMA_WRITE_FSM_ON: begin // If all input data read exit - if (padding_fsm_done_i == 1'b1 && write_fifo_empty == 1'b1) begin + if (hw_fifo_mode_i == 1'b1 && padding_fsm_done_i == 1'b1 && hw_w_fifo_empty_i == 1'b1) begin + dma_done = 1'b1; + dma_write_fsm_n_state = DMA_WRITE_FSM_IDLE; + end else if (hw_fifo_mode_i == 1'b0 && padding_fsm_done_i == 1'b1 && write_fifo_empty == 1'b1) begin dma_done = (write_fifo_empty == 1'b1); // If all input data has been processed and written, exit, otherwise finish storing the data dma_write_fsm_n_state = dma_done ? DMA_WRITE_FSM_IDLE : DMA_WRITE_FSM_ON; end else begin dma_write_fsm_n_state = DMA_WRITE_FSM_ON; // Wait if write fifo is empty or if the SPI TX is not ready for new data (only in SPI mode 2). - if (write_fifo_empty == 1'b0 && wait_for_tx == 1'b0 && (read_addr_fifo_empty && address_mode) == 1'b0) begin + if ((write_fifo_empty == 1'b0 && wait_for_tx == 1'b0 + && (read_addr_fifo_empty && address_mode) == 1'b0) || (hw_w_fifo_empty_i == 1'b0 && hw_fifo_mode_i == 1'b1)) begin data_out_req = 1'b1; data_out_we = 1'b1; data_out_be = byte_enable_out; @@ -229,15 +237,24 @@ module dma_obiwrite_fsm endcase end + logic [31:0] data_to_write; + always_comb begin + if (hw_fifo_mode_i) begin + data_to_write = hw_w_fifo_data_i; + end else begin + data_to_write = fifo_output; + end + end + /* Perform the data shift */ always_comb begin : proc_output_data - data_out_wdata[7:0] = fifo_output[7:0]; - data_out_wdata[15:8] = fifo_output[15:8]; - data_out_wdata[23:16] = fifo_output[23:16]; - data_out_wdata[31:24] = fifo_output[31:24]; + data_out_wdata[7:0] = data_to_write[7:0]; + data_out_wdata[15:8] = data_to_write[15:8]; + data_out_wdata[23:16] = data_to_write[23:16]; + data_out_wdata[31:24] = data_to_write[31:24]; - if (address_mode == 1'b0) begin + if (address_mode == 1'b0 && hw_fifo_mode_i == 1'b0) begin case (write_ptr_reg[1:0]) 2'b00: begin if (sign_extend) begin @@ -248,20 +265,20 @@ module dma_obiwrite_fsm { DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_WORD } : - data_out_wdata[31:16] = {16{fifo_output[15]}}; + data_out_wdata[31:16] = {16{data_to_write[15]}}; { DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_WORD }, { DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_WORD } : - data_out_wdata[31:8] = {24{fifo_output[7]}}; + data_out_wdata[31:8] = {24{data_to_write[7]}}; {DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_HALF_WORD} : ; { DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_HALF_WORD }, { DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_HALF_WORD } : - data_out_wdata[15:8] = {8{fifo_output[7]}}; + data_out_wdata[15:8] = {8{data_to_write[7]}}; default: ; endcase end else begin @@ -288,10 +305,10 @@ module dma_obiwrite_fsm end end 2'b01: - data_out_wdata[15:8] = fifo_output[7:0]; // Writing a byte, no need for sign extension + data_out_wdata[15:8] = data_to_write[7:0]; // Writing a byte, no need for sign extension 2'b10: begin // Writing a half-word or a byte - data_out_wdata[23:16] = fifo_output[7:0]; - data_out_wdata[31:24] = fifo_output[15:8]; + data_out_wdata[23:16] = data_to_write[7:0]; + data_out_wdata[31:24] = data_to_write[15:8]; if (sign_extend) begin case ({ @@ -303,7 +320,7 @@ module dma_obiwrite_fsm }, { DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_HALF_WORD } : - data_out_wdata[31:24] = {8{fifo_output[7]}}; + data_out_wdata[31:24] = {8{data_to_write[7]}}; default: ; endcase end else begin @@ -322,7 +339,7 @@ module dma_obiwrite_fsm end end 2'b11: - data_out_wdata[31:24] = fifo_output[7:0]; // Writing a byte, no need for sign extension + data_out_wdata[31:24] = data_to_write[7:0]; // Writing a byte, no need for sign extension endcase end end @@ -357,5 +374,4 @@ module dma_obiwrite_fsm /* Sign extension */ assign sign_extend = reg2hw.sign_ext.q & ( (src_data_type[1] & ~dst_data_type[1]) | ((src_data_type[1] == dst_data_type[1]) & (src_data_type[0] & ~dst_data_type[0]))); - endmodule diff --git a/hw/ip/dma/rtl/dma_padding_fsm.sv b/hw/ip/dma/rtl/dma_padding_fsm.sv index d16c0bf31..78f3f7a01 100644 --- a/hw/ip/dma/rtl/dma_padding_fsm.sv +++ b/hw/ip/dma/rtl/dma_padding_fsm.sv @@ -23,6 +23,10 @@ module dma_padding_fsm input logic write_fifo_alm_full_i, input logic [31:0] data_read_i, + input logic hw_fifo_mode_i, + input logic hw_w_fifo_push_i, + output logic hw_r_fifo_push_padding_o, + output logic padding_fsm_done_o, output logic write_fifo_push_o, output logic read_fifo_pop_o, @@ -106,6 +110,7 @@ module dma_padding_fsm pad_state_q <= PAD_IDLE; end else begin /* Advance in the FSM only if the write FIFO is available */ + // ----------------- add hw_w_fifo_full_i == 1'b0 if (write_fifo_en == 1'b1 && dma_padding_fsm_on_i == 1'b1 && padding_fsm_done_o == 1'b0) begin pad_state_q <= pad_state_d; end @@ -197,6 +202,7 @@ module dma_padding_fsm data_write_o = '0; write_fifo_push_o = 1'b0; read_fifo_pop_o = 1'b0; + hw_r_fifo_push_padding_o = 1'b0; if (dma_padding_fsm_on_i == 1'b1 && padding_fsm_done_o == 1'b0) begin /* @@ -204,7 +210,9 @@ module dma_padding_fsm * If we don't have to pad, we need to wait for the read fifo to be not empty. * In both cases, we need to wait for the write fifo to have some space. */ - if (pad_on == 1'b1 & write_fifo_en == 1'b1) begin + if (hw_fifo_mode_i == 1'b1 & pad_on == 1'b1) begin + hw_r_fifo_push_padding_o = 1'b1; + end else if (pad_on == 1'b1 & write_fifo_en == 1'b1) begin write_fifo_push_o = 1'b1; end else if (read_fifo_en == 1'b1 & write_fifo_en == 1'b1) begin data_write_o = data_read_i; @@ -227,7 +235,8 @@ module dma_padding_fsm dma_cnt_d1 <= '0; dma_cnt_d2 <= '0; end else if ((dma_padding_fsm_on_i == 1'b1 && padding_fsm_done_o == 1'b0) & - ((pad_on == 1'b1 & write_fifo_en == 1'b1) || + ((hw_fifo_mode_i == 1'b1 & hw_w_fifo_push_i == 1'b1) || + (pad_on == 1'b1 & write_fifo_en == 1'b1 ) || (read_fifo_en == 1'b1 & write_fifo_en == 1'b1))) begin if (dma_conf_1d == 1'b1) begin // 1D case diff --git a/hw/ip/dma/rtl/dma_reg_pkg.sv b/hw/ip/dma/rtl/dma_reg_pkg.sv index 4e98968c2..c27cf3be9 100644 --- a/hw/ip/dma/rtl/dma_reg_pkg.sv +++ b/hw/ip/dma/rtl/dma_reg_pkg.sv @@ -56,7 +56,7 @@ package dma_reg_pkg; typedef struct packed {logic q;} dma_reg2hw_sign_ext_reg_t; - typedef struct packed {logic [1:0] q;} dma_reg2hw_mode_reg_t; + typedef struct packed {logic [2:0] q;} dma_reg2hw_mode_reg_t; typedef struct packed {logic q;} dma_reg2hw_dim_config_reg_t; @@ -89,6 +89,8 @@ package dma_reg_pkg; logic re; } dma_reg2hw_window_ifr_reg_t; + typedef struct packed {logic q;} dma_reg2hw_hw_fifo_mode_sign_ext_reg_t; + typedef struct packed { struct packed {logic d;} ready; struct packed {logic d;} window_done; @@ -105,32 +107,33 @@ package dma_reg_pkg; // Register -> HW type typedef struct packed { - dma_reg2hw_src_ptr_reg_t src_ptr; // [282:251] - dma_reg2hw_dst_ptr_reg_t dst_ptr; // [250:219] - dma_reg2hw_addr_ptr_reg_t addr_ptr; // [218:187] - dma_reg2hw_size_d1_reg_t size_d1; // [186:170] - dma_reg2hw_size_d2_reg_t size_d2; // [169:154] - dma_reg2hw_status_reg_t status; // [153:150] - dma_reg2hw_src_ptr_inc_d1_reg_t src_ptr_inc_d1; // [149:144] - dma_reg2hw_src_ptr_inc_d2_reg_t src_ptr_inc_d2; // [143:121] - dma_reg2hw_dst_ptr_inc_d1_reg_t dst_ptr_inc_d1; // [120:115] - dma_reg2hw_dst_ptr_inc_d2_reg_t dst_ptr_inc_d2; // [114:92] - dma_reg2hw_slot_reg_t slot; // [91:60] - dma_reg2hw_src_data_type_reg_t src_data_type; // [59:58] - dma_reg2hw_dst_data_type_reg_t dst_data_type; // [57:56] - dma_reg2hw_sign_ext_reg_t sign_ext; // [55:55] - dma_reg2hw_mode_reg_t mode; // [54:53] - dma_reg2hw_dim_config_reg_t dim_config; // [52:52] - dma_reg2hw_dim_inv_reg_t dim_inv; // [51:51] - dma_reg2hw_pad_top_reg_t pad_top; // [50:45] - dma_reg2hw_pad_bottom_reg_t pad_bottom; // [44:39] - dma_reg2hw_pad_right_reg_t pad_right; // [38:33] - dma_reg2hw_pad_left_reg_t pad_left; // [32:27] - dma_reg2hw_window_size_reg_t window_size; // [26:14] - dma_reg2hw_window_count_reg_t window_count; // [13:6] - dma_reg2hw_interrupt_en_reg_t interrupt_en; // [5:4] - dma_reg2hw_transaction_ifr_reg_t transaction_ifr; // [3:2] - dma_reg2hw_window_ifr_reg_t window_ifr; // [1:0] + dma_reg2hw_src_ptr_reg_t src_ptr; // [284:253] + dma_reg2hw_dst_ptr_reg_t dst_ptr; // [252:221] + dma_reg2hw_addr_ptr_reg_t addr_ptr; // [220:189] + dma_reg2hw_size_d1_reg_t size_d1; // [188:172] + dma_reg2hw_size_d2_reg_t size_d2; // [171:156] + dma_reg2hw_status_reg_t status; // [155:152] + dma_reg2hw_src_ptr_inc_d1_reg_t src_ptr_inc_d1; // [151:146] + dma_reg2hw_src_ptr_inc_d2_reg_t src_ptr_inc_d2; // [145:123] + dma_reg2hw_dst_ptr_inc_d1_reg_t dst_ptr_inc_d1; // [122:117] + dma_reg2hw_dst_ptr_inc_d2_reg_t dst_ptr_inc_d2; // [116:94] + dma_reg2hw_slot_reg_t slot; // [93:62] + dma_reg2hw_src_data_type_reg_t src_data_type; // [61:60] + dma_reg2hw_dst_data_type_reg_t dst_data_type; // [59:58] + dma_reg2hw_sign_ext_reg_t sign_ext; // [57:57] + dma_reg2hw_mode_reg_t mode; // [56:54] + dma_reg2hw_dim_config_reg_t dim_config; // [53:53] + dma_reg2hw_dim_inv_reg_t dim_inv; // [52:52] + dma_reg2hw_pad_top_reg_t pad_top; // [51:46] + dma_reg2hw_pad_bottom_reg_t pad_bottom; // [45:40] + dma_reg2hw_pad_right_reg_t pad_right; // [39:34] + dma_reg2hw_pad_left_reg_t pad_left; // [33:28] + dma_reg2hw_window_size_reg_t window_size; // [27:15] + dma_reg2hw_window_count_reg_t window_count; // [14:7] + dma_reg2hw_interrupt_en_reg_t interrupt_en; // [6:5] + dma_reg2hw_transaction_ifr_reg_t transaction_ifr; // [4:3] + dma_reg2hw_window_ifr_reg_t window_ifr; // [2:1] + dma_reg2hw_hw_fifo_mode_sign_ext_reg_t hw_fifo_mode_sign_ext; // [0:0] } dma_reg2hw_t; // HW -> register type @@ -168,6 +171,7 @@ package dma_reg_pkg; parameter logic [BlockAw-1:0] DMA_INTERRUPT_EN_OFFSET = 7'h5c; parameter logic [BlockAw-1:0] DMA_TRANSACTION_IFR_OFFSET = 7'h60; parameter logic [BlockAw-1:0] DMA_WINDOW_IFR_OFFSET = 7'h64; + parameter logic [BlockAw-1:0] DMA_HW_FIFO_MODE_SIGN_EXT_OFFSET = 7'h68; // Reset values for hwext registers and their fields parameter logic [1:0] DMA_STATUS_RESVAL = 2'h1; @@ -205,11 +209,12 @@ package dma_reg_pkg; DMA_WINDOW_COUNT, DMA_INTERRUPT_EN, DMA_TRANSACTION_IFR, - DMA_WINDOW_IFR + DMA_WINDOW_IFR, + DMA_HW_FIFO_MODE_SIGN_EXT } dma_id_e; // Register width information to check illegal writes - parameter logic [3:0] DMA_PERMIT[26] = '{ + parameter logic [3:0] DMA_PERMIT[27] = '{ 4'b1111, // index[ 0] DMA_SRC_PTR 4'b1111, // index[ 1] DMA_DST_PTR 4'b1111, // index[ 2] DMA_ADDR_PTR @@ -235,7 +240,8 @@ package dma_reg_pkg; 4'b0001, // index[22] DMA_WINDOW_COUNT 4'b0001, // index[23] DMA_INTERRUPT_EN 4'b0001, // index[24] DMA_TRANSACTION_IFR - 4'b0001 // index[25] DMA_WINDOW_IFR + 4'b0001, // index[25] DMA_WINDOW_IFR + 4'b0001 // index[26] DMA_HW_FIFO_MODE_SIGN_EXT }; endpackage diff --git a/hw/ip/dma/rtl/dma_reg_top.sv b/hw/ip/dma/rtl/dma_reg_top.sv index ea7897c00..d05d73b64 100644 --- a/hw/ip/dma/rtl/dma_reg_top.sv +++ b/hw/ip/dma/rtl/dma_reg_top.sv @@ -114,8 +114,8 @@ module dma_reg_top #( logic sign_ext_qs; logic sign_ext_wd; logic sign_ext_we; - logic [1:0] mode_qs; - logic [1:0] mode_wd; + logic [2:0] mode_qs; + logic [2:0] mode_wd; logic mode_we; logic dim_config_qs; logic dim_config_wd; @@ -149,6 +149,9 @@ module dma_reg_top #( logic transaction_ifr_re; logic window_ifr_qs; logic window_ifr_re; + logic hw_fifo_mode_sign_ext_qs; + logic hw_fifo_mode_sign_ext_wd; + logic hw_fifo_mode_sign_ext_we; // Register instances // R[src_ptr]: V(False) @@ -564,9 +567,9 @@ module dma_reg_top #( // R[mode]: V(False) prim_subreg #( - .DW (2), + .DW (3), .SWACCESS("RW"), - .RESVAL (2'h0) + .RESVAL (3'h0) ) u_mode ( .clk_i (clk_i), .rst_ni(rst_ni), @@ -889,9 +892,36 @@ module dma_reg_top #( ); + // R[hw_fifo_mode_sign_ext]: V(False) + + prim_subreg #( + .DW (1), + .SWACCESS("RW"), + .RESVAL (1'h0) + ) u_hw_fifo_mode_sign_ext ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(hw_fifo_mode_sign_ext_we), + .wd(hw_fifo_mode_sign_ext_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.hw_fifo_mode_sign_ext.q), + + // to register interface (read) + .qs(hw_fifo_mode_sign_ext_qs) + ); + + - logic [25:0] addr_hit; + logic [26:0] addr_hit; always_comb begin addr_hit = '0; addr_hit[0] = (reg_addr == DMA_SRC_PTR_OFFSET); @@ -920,6 +950,7 @@ module dma_reg_top #( addr_hit[23] = (reg_addr == DMA_INTERRUPT_EN_OFFSET); addr_hit[24] = (reg_addr == DMA_TRANSACTION_IFR_OFFSET); addr_hit[25] = (reg_addr == DMA_WINDOW_IFR_OFFSET); + addr_hit[26] = (reg_addr == DMA_HW_FIFO_MODE_SIGN_EXT_OFFSET); end assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0; @@ -952,7 +983,8 @@ module dma_reg_top #( (addr_hit[22] & (|(DMA_PERMIT[22] & ~reg_be))) | (addr_hit[23] & (|(DMA_PERMIT[23] & ~reg_be))) | (addr_hit[24] & (|(DMA_PERMIT[24] & ~reg_be))) | - (addr_hit[25] & (|(DMA_PERMIT[25] & ~reg_be))))); + (addr_hit[25] & (|(DMA_PERMIT[25] & ~reg_be))) | + (addr_hit[26] & (|(DMA_PERMIT[26] & ~reg_be))))); end assign src_ptr_we = addr_hit[0] & reg_we & !reg_error; @@ -1002,7 +1034,7 @@ module dma_reg_top #( assign sign_ext_wd = reg_wdata[0]; assign mode_we = addr_hit[14] & reg_we & !reg_error; - assign mode_wd = reg_wdata[1:0]; + assign mode_wd = reg_wdata[2:0]; assign dim_config_we = addr_hit[15] & reg_we & !reg_error; assign dim_config_wd = reg_wdata[0]; @@ -1035,6 +1067,9 @@ module dma_reg_top #( assign window_ifr_re = addr_hit[25] & reg_re & !reg_error; + assign hw_fifo_mode_sign_ext_we = addr_hit[26] & reg_we & !reg_error; + assign hw_fifo_mode_sign_ext_wd = reg_wdata[0]; + // Read data return always_comb begin reg_rdata_next = '0; @@ -1098,7 +1133,7 @@ module dma_reg_top #( end addr_hit[14]: begin - reg_rdata_next[1:0] = mode_qs; + reg_rdata_next[2:0] = mode_qs; end addr_hit[15]: begin @@ -1146,6 +1181,10 @@ module dma_reg_top #( reg_rdata_next[0] = window_ifr_qs; end + addr_hit[26]: begin + reg_rdata_next[0] = hw_fifo_mode_sign_ext_qs; + end + default: begin reg_rdata_next = '1; end diff --git a/hw/ip/dma/rtl/hw_r_fifo_ctrl.sv b/hw/ip/dma/rtl/hw_r_fifo_ctrl.sv new file mode 100644 index 000000000..d12ffdc6c --- /dev/null +++ b/hw/ip/dma/rtl/hw_r_fifo_ctrl.sv @@ -0,0 +1,19 @@ +module hw_r_fifo_ctrl ( + input logic hw_fifo_mode_i, + input logic [31:0] data_i, + input logic data_valid_i, + input logic hw_r_fifo_push_padding_i, + output logic push_o, + output logic [31:0] data_o +); + + always_comb begin + push_o = 1'b0; + data_o = '0; + if (hw_fifo_mode_i == 1'b1 && data_valid_i == 1'b1 || hw_r_fifo_push_padding_i == 1'b1) begin + push_o = 1'b1; + data_o = data_i; + end + end + +endmodule diff --git a/hw/ip/dma/rtl/hw_w_fifo_ctrl.sv b/hw/ip/dma/rtl/hw_w_fifo_ctrl.sv new file mode 100644 index 000000000..04a32686e --- /dev/null +++ b/hw/ip/dma/rtl/hw_w_fifo_ctrl.sv @@ -0,0 +1,17 @@ +module hw_w_fifo_ctrl ( + input logic hw_fifo_mode_i, + output logic [31:0] data_o, + input logic [31:0] data_i, + input logic data_out_gnt_i, + output logic pop_o +); + + always_comb begin + pop_o = 1'b0; + if (hw_fifo_mode_i == 1'b1 && data_out_gnt_i == 1'b1) begin + pop_o = 1'b1; + end + data_o = data_i; + end + +endmodule diff --git a/hw/ip/dma_subsystem/rtl/dma_subsystem.sv b/hw/ip/dma_subsystem/rtl/dma_subsystem.sv index 6ac3df37e..99266c6e3 100644 --- a/hw/ip/dma_subsystem/rtl/dma_subsystem.sv +++ b/hw/ip/dma_subsystem/rtl/dma_subsystem.sv @@ -15,6 +15,8 @@ module dma_subsystem #( parameter type reg_rsp_t = logic, parameter type obi_req_t = logic, parameter type obi_resp_t = logic, + parameter type hw_fifo_req_t = logic, + parameter type hw_fifo_resp_t = logic, parameter int unsigned GLOBAL_SLOT_NUM = 0, parameter int unsigned EXT_SLOT_NUM = 0 ) ( @@ -34,6 +36,9 @@ module dma_subsystem #( output obi_req_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] dma_addr_req_o, input obi_resp_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] dma_addr_resp_i, + output hw_fifo_req_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_req_o, + input hw_fifo_resp_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_resp_i, + input logic [GLOBAL_SLOT_NUM-1:0] global_trigger_slot_i, input logic [EXT_SLOT_NUM-1:0] ext_trigger_slot_i, @@ -119,6 +124,8 @@ module dma_subsystem #( .reg_rsp_t (reg_pkg::reg_rsp_t), .obi_req_t (obi_pkg::obi_req_t), .obi_resp_t(obi_pkg::obi_resp_t), + .hw_fifo_req_t (hw_fifo_pkg::hw_fifo_req_t), + .hw_fifo_resp_t(hw_fifo_pkg::hw_fifo_resp_t), .SLOT_NUM (GLOBAL_SLOT_NUM + 2), .FIFO_DEPTH (fifo_size) ) dma_i ( @@ -134,6 +141,10 @@ module dma_subsystem #( .dma_write_resp_i(xbar_write_resp[i]), .dma_addr_req_o(xbar_address_req[i]), .dma_addr_resp_i(xbar_address_resp[i]), + + .hw_fifo_req_o(hw_fifo_req_o[i]), + .hw_fifo_resp_i(hw_fifo_resp_i[i]), + .trigger_slot_i({ ext_trigger_slot_i[2*i+1], ext_trigger_slot_i[2*i], global_trigger_slot_i }), diff --git a/hw/system/x_heep_system.sv.tpl b/hw/system/x_heep_system.sv.tpl index 2a01983e9..125e5268d 100644 --- a/hw/system/x_heep_system.sv.tpl +++ b/hw/system/x_heep_system.sv.tpl @@ -5,6 +5,7 @@ module x_heep_system import obi_pkg::*; import reg_pkg::*; + import hw_fifo_pkg::*; #( parameter COREV_PULP = 0, parameter FPU = 0, @@ -38,6 +39,9 @@ module x_heep_system output obi_req_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] ext_dma_addr_req_o, input obi_resp_t [core_v_mini_mcu_pkg::DMA_NUM_MASTER_PORTS-1:0] ext_dma_addr_resp_i, + output hw_fifo_req_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_req_o, + input hw_fifo_resp_t [core_v_mini_mcu_pkg::DMA_CH_NUM-1:0] hw_fifo_resp_i, + input reg_req_t [AO_SPC_NUM_RND-1:0] ext_ao_peripheral_req_i, output reg_rsp_t [AO_SPC_NUM_RND-1:0] ext_ao_peripheral_resp_o, @@ -154,6 +158,8 @@ ${pad.core_v_mini_mcu_bonding} .ext_dma_addr_req_o, .ext_dma_addr_resp_i, .ext_dma_stop_i, + .hw_fifo_req_o, + .hw_fifo_resp_i, .ext_peripheral_slave_req_o, .ext_peripheral_slave_resp_i, .ext_debug_req_o(ext_debug_req), diff --git a/sw/applications/example_dma_subaddressing/buffer.h b/sw/applications/example_dma_subaddressing/buffer.h new file mode 100644 index 000000000..66d8b1734 --- /dev/null +++ b/sw/applications/example_dma_subaddressing/buffer.h @@ -0,0 +1,67 @@ + uint32_t flash_original_128B[32] = { + 0x76543211, 0xfedcba99, 0x579a6f91, 0x657d5bef, 0x758ee420, 0x01234568, 0xfedbca97, 0x89abde00, + 0x76543212, 0xfedcba9a, 0x579a6f92, 0x657d5bf0, 0x758ee421, 0x01234569, 0xfedbca98, 0x89abde01, + 0x76543213, 0xfedcba9b, 0x579a6f93, 0x657d5bf1, 0x758ee422, 0x0123456a, 0xfedbca99, 0x89abde02, + 0x76543214, 0xfedcba9c, 0x579a6f94, 0x657d5bf2, 0x758ee423, 0x0123456b, 0xfedbca9a, 0x89abde03 +}; + +uint32_t test_flash_se_half_words[64] = { + 0x00003211, 0x00007654, 0xffffba99, 0xfffffedc, 0x00006f91, 0x0000579a, 0x00005bef, 0x0000657d, + 0xffffe420, 0x0000758e, 0x00004568, 0x00000123, 0xffffca97, 0xfffffedb, 0xffffde00, 0xffff89ab, + 0x00003212, 0x00007654, 0xffffba9a, 0xfffffedc, 0x00006f92, 0x0000579a, 0x00005bf0, 0x0000657d, + 0xffffe421, 0x0000758e, 0x00004569, 0x00000123, 0xffffca98, 0xfffffedb, 0xffffde01, 0xffff89ab, + 0x00003213, 0x00007654, 0xffffba9b, 0xfffffedc, 0x00006f93, 0x0000579a, 0x00005bf1, 0x0000657d, + 0xffffe422, 0x0000758e, 0x0000456a, 0x00000123, 0xffffca99, 0xfffffedb, 0xffffde02, 0xffff89ab, + 0x00003214, 0x00007654, 0xffffba9c, 0xfffffedc, 0x00006f94, 0x0000579a, 0x00005bf2, 0x0000657d, + 0xffffe423, 0x0000758e, 0x0000456b, 0x00000123, 0xffffca9a, 0xfffffedb, 0xffffde03, 0xffff89ab +}; + +uint32_t test_flash_half_words[64] = { + 0x00003211, 0x00007654, 0x0000ba99, 0x0000fedc, 0x00006f91, 0x0000579a, 0x00005bef, 0x0000657d, + 0x0000e420, 0x0000758e, 0x00004568, 0x00000123, 0x0000ca97, 0x0000fedb, 0x0000de00, 0x000089ab, + 0x00003212, 0x00007654, 0x0000ba9a, 0x0000fedc, 0x00006f92, 0x0000579a, 0x00005bf0, 0x0000657d, + 0x0000e421, 0x0000758e, 0x00004569, 0x00000123, 0x0000ca98, 0x0000fedb, 0x0000de01, 0x000089ab, + 0x00003213, 0x00007654, 0x0000ba9b, 0x0000fedc, 0x00006f93, 0x0000579a, 0x00005bf1, 0x0000657d, + 0x0000e422, 0x0000758e, 0x0000456a, 0x00000123, 0x0000ca99, 0x0000fedb, 0x0000de02, 0x000089ab, + 0x00003214, 0x00007654, 0x0000ba9c, 0x0000fedc, 0x00006f94, 0x0000579a, 0x00005bf2, 0x0000657d, + 0x0000e423, 0x0000758e, 0x0000456b, 0x00000123, 0x0000ca9a, 0x0000fedb, 0x0000de03, 0x000089ab +}; + +uint32_t test_flash_se_bytes[128] = { + 0x00000011, 0x00000032, 0x00000054, 0x00000076, 0xffffff99, 0xffffffba, 0xffffffdc, 0xfffffffe, + 0xffffff91, 0x0000006f, 0xffffff9a, 0x00000057, 0xffffffef, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000020, 0xffffffe4, 0xffffff8e, 0x00000075, 0x00000068, 0x00000045, 0x00000023, 0x00000001, + 0xffffff97, 0xffffffca, 0xffffffdb, 0xfffffffe, 0x00000000, 0xffffffde, 0xffffffab, 0xffffff89, + 0x00000012, 0x00000032, 0x00000054, 0x00000076, 0xffffff9a, 0xffffffba, 0xffffffdc, 0xfffffffe, + 0xffffff92, 0x0000006f, 0xffffff9a, 0x00000057, 0xfffffff0, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000021, 0xffffffe4, 0xffffff8e, 0x00000075, 0x00000069, 0x00000045, 0x00000023, 0x00000001, + 0xffffff98, 0xffffffca, 0xffffffdb, 0xfffffffe, 0x00000001, 0xffffffde, 0xffffffab, 0xffffff89, + 0x00000013, 0x00000032, 0x00000054, 0x00000076, 0xffffff9b, 0xffffffba, 0xffffffdc, 0xfffffffe, + 0xffffff93, 0x0000006f, 0xffffff9a, 0x00000057, 0xfffffff1, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000022, 0xffffffe4, 0xffffff8e, 0x00000075, 0x0000006a, 0x00000045, 0x00000023, 0x00000001, + 0xffffff99, 0xffffffca, 0xffffffdb, 0xfffffffe, 0x00000002, 0xffffffde, 0xffffffab, 0xffffff89, + 0x00000014, 0x00000032, 0x00000054, 0x00000076, 0xffffff9c, 0xffffffba, 0xffffffdc, 0xfffffffe, + 0xffffff94, 0x0000006f, 0xffffff9a, 0x00000057, 0xfffffff2, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000023, 0xffffffe4, 0xffffff8e, 0x00000075, 0x0000006b, 0x00000045, 0x00000023, 0x00000001, + 0xffffff9a, 0xffffffca, 0xffffffdb, 0xfffffffe, 0x00000003, 0xffffffde, 0xffffffab, 0xffffff89 +}; + +uint32_t test_flash_bytes[128] = { + 0x00000011, 0x00000032, 0x00000054, 0x00000076, 0x00000099, 0x000000ba, 0x000000dc, 0x000000fe, + 0x00000091, 0x0000006f, 0x0000009a, 0x00000057, 0x000000ef, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000020, 0x000000e4, 0x0000008e, 0x00000075, 0x00000068, 0x00000045, 0x00000023, 0x00000001, + 0x00000097, 0x000000ca, 0x000000db, 0x000000fe, 0x00000000, 0x000000de, 0x000000ab, 0x00000089, + 0x00000012, 0x00000032, 0x00000054, 0x00000076, 0x0000009a, 0x000000ba, 0x000000dc, 0x000000fe, + 0x00000092, 0x0000006f, 0x0000009a, 0x00000057, 0x000000f0, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000021, 0x000000e4, 0x0000008e, 0x00000075, 0x00000069, 0x00000045, 0x00000023, 0x00000001, + 0x00000098, 0x000000ca, 0x000000db, 0x000000fe, 0x00000001, 0x000000de, 0x000000ab, 0x00000089, + 0x00000013, 0x00000032, 0x00000054, 0x00000076, 0x0000009b, 0x000000ba, 0x000000dc, 0x000000fe, + 0x00000093, 0x0000006f, 0x0000009a, 0x00000057, 0x000000f1, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000022, 0x000000e4, 0x0000008e, 0x00000075, 0x0000006a, 0x00000045, 0x00000023, 0x00000001, + 0x00000099, 0x000000ca, 0x000000db, 0x000000fe, 0x00000002, 0x000000de, 0x000000ab, 0x00000089, + 0x00000014, 0x00000032, 0x00000054, 0x00000076, 0x0000009c, 0x000000ba, 0x000000dc, 0x000000fe, + 0x00000094, 0x0000006f, 0x0000009a, 0x00000057, 0x000000f2, 0x0000005b, 0x0000007d, 0x00000065, + 0x00000023, 0x000000e4, 0x0000008e, 0x00000075, 0x0000006b, 0x00000045, 0x00000023, 0x00000001, + 0x0000009a, 0x000000ca, 0x000000db, 0x000000fe, 0x00000003, 0x000000de, 0x000000ab, 0x00000089 +}; + diff --git a/sw/applications/example_dma_subaddressing/main.c b/sw/applications/example_dma_subaddressing/main.c new file mode 100644 index 000000000..14ed76ee0 --- /dev/null +++ b/sw/applications/example_dma_subaddressing/main.c @@ -0,0 +1,296 @@ +/** + * @file main.c + * @brief Simple spi write example using BSP + * + * Simple example that writes a 1kB buffer to flash memory at a specific address + * and then read it back to check if the data was written correctly. + * +*/ + +#include +#include +#include + +/* To get TX and RX FIFO depth */ +#include "spi_host_regs.h" +/* To get SPI functions */ +#include "spi_host.h" + +#include "x-heep.h" +#include "w25q128jw.h" +#include "dma.h" + +/* By default, PRINTFs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 0 +#define PRINTF_IN_SIM 1 + +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define PRINTF(...) +#endif + +#if defined(TARGET_PYNQ_Z2) || defined(TARGET_ZCU104) || defined(TARGET_NEXYS_A7_100T) + #define USE_SPI_FLASH +#endif + +#ifndef TARGET_SIM +#define USE_SPI_FLASH +#endif + +// Start buffers (the original data) +#include "buffer.h" +// End buffer (where what is read is stored) +uint32_t flash_data[256]; + +#define TEST_BUFFER_WORDS flash_original_128B +#define TEST_BUFFER_SE_HALF_WORDS test_flash_se_half_words +#define TEST_BUFFER_SE_BYTES test_flash_se_bytes +#define TEST_BUFFER_HALF_WORDS test_flash_half_words +#define TEST_BUFFER_BYTES test_flash_bytes +#define LENGTH 128 + +typedef enum { + TYPE_WORD = 2, + TYPE_HALF_WORD = 1, + TYPE_BYTE = 0 +} dma_trans_data_t; + +// Test functions +uint32_t test_read_dma(uint32_t *test_buffer, uint32_t len, dma_trans_data_t dma_data_type, uint8_t sign_extend); +uint32_t test_read_quad_dma(uint32_t *test_buffer, uint32_t len, dma_trans_data_t dma_data_type, uint8_t sign_extend); + +// Check function +uint32_t check_result(uint32_t *test_buffer, uint32_t len, dma_trans_data_t dma_data_type, uint32_t sign_extend); + +// Define global status variable +w25q_error_codes_t global_status; + +int main(int argc, char *argv[]) { + soc_ctrl_t soc_ctrl; + soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); + + if ( get_spi_flash_mode(&soc_ctrl) == SOC_CTRL_SPI_FLASH_MODE_SPIMEMIO ) { + PRINTF("This application cannot work with the memory mapped SPI FLASH" + "module - do not use the FLASH_EXEC linker script for this application\n"); + return EXIT_SUCCESS; + } + + // Pick the correct spi device based on simulation type + spi_host_t* spi; + #ifndef USE_SPI_FLASH + spi = spi_host1; + #else + spi = spi_flash; + #endif + + // Define status variable + int32_t errors = 0; + + // Init SPI host and SPI<->Flash bridge parameters + if (w25q128jw_init(spi) != FLASH_OK) return EXIT_FAILURE; + + // DMA transaction data type + dma_trans_data_t dma_data_type; + + // Test simple read with DMA + PRINTF("Testing read with DMA in SUBADDRESS mode...\n"); + + dma_data_type = TYPE_WORD; + errors += test_read_dma(TEST_BUFFER_WORDS, LENGTH, dma_data_type, 0); + dma_data_type = TYPE_HALF_WORD; + errors += test_read_dma(TEST_BUFFER_HALF_WORDS, LENGTH, dma_data_type, 0); + errors += test_read_dma(TEST_BUFFER_SE_HALF_WORDS, LENGTH, dma_data_type, 1); + dma_data_type = TYPE_BYTE; + errors += test_read_dma(TEST_BUFFER_BYTES, LENGTH, dma_data_type, 0); + errors += test_read_dma(TEST_BUFFER_SE_BYTES, LENGTH, dma_data_type, 1); + + // Test quad read with DMA + dma_data_type = TYPE_WORD; + errors += test_read_quad_dma(TEST_BUFFER_WORDS, LENGTH, dma_data_type, 0); + dma_data_type = TYPE_HALF_WORD; + errors += test_read_quad_dma(TEST_BUFFER_HALF_WORDS, LENGTH, dma_data_type, 0); + errors += test_read_quad_dma(TEST_BUFFER_SE_HALF_WORDS, LENGTH, dma_data_type, 1); + dma_data_type = TYPE_BYTE; + errors += test_read_quad_dma(TEST_BUFFER_BYTES, LENGTH, dma_data_type, 0); + errors += test_read_quad_dma(TEST_BUFFER_SE_BYTES, LENGTH, dma_data_type, 1); + + PRINTF("\n--------TEST FINISHED--------\n"); + if (errors == 0) { + PRINTF("All tests passed!\n"); + return EXIT_SUCCESS; + } else { + PRINTF("Some tests failed!\n"); + return EXIT_FAILURE; + } + +} + +uint32_t test_read_dma(uint32_t *test_buffer, uint32_t len, dma_trans_data_t dma_data_type, uint8_t sign_extend) { + + dma_data_type_t dma_trans_data_type; + + switch (dma_data_type) { + case TYPE_WORD: + dma_trans_data_type = DMA_DATA_TYPE_WORD; + break; + case TYPE_HALF_WORD: + dma_trans_data_type = DMA_DATA_TYPE_HALF_WORD; + break; + case TYPE_BYTE: + dma_trans_data_type = DMA_DATA_TYPE_BYTE; + break; + default: + break; + } + + dma_init(NULL); + + // The DMA will wait for the SPI HOST/FLASH RX FIFO valid signal + #ifndef USE_SPI_FLASH + uint8_t slot = DMA_TRIG_SLOT_SPI_RX; + #else + uint8_t slot = DMA_TRIG_SLOT_SPI_FLASH_RX; + #endif + + // Set up DMA source target + dma_target_t tgt_src = { + .inc_d1_du = 0, // Target is peripheral, no increment + .type = dma_trans_data_type, + }; + // Target is SPI RX FIFO + tgt_src.ptr = (uint8_t*) (w25q128jw_read_standard_setup((uint32_t*)(TEST_BUFFER_WORDS), flash_data, len)); + // Trigger to control the data flow + tgt_src.trig = slot; + + // Set up DMA destination target + dma_target_t tgt_dst = { + .inc_d1_du = 1, // Increment by 1 data unit (word) + .type = DMA_DATA_TYPE_WORD, + .trig = DMA_TRIG_MEMORY, // Read-write operation to memory + }; + tgt_dst.ptr = (uint8_t*)flash_data; // Target is the data buffer + + // Set up DMA transaction + dma_trans_t trans = { + .src = &tgt_src, + .dst = &tgt_dst, + .end = DMA_TRANS_END_POLLING, + .mode = DMA_TRANS_MODE_SUBADDRESS, + .sign_ext = sign_extend, + }; + + // Size is in data units (words in this case) + trans.size_d1_du = len >> (dma_data_type); + + // Validate, load and launch DMA transaction + dma_config_flags_t res; + res = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); + res = dma_load_transaction(&trans); + res = dma_launch(&trans); + + // Wait for DMA to finish transaction + while(!dma_is_ready(0)); + + uint32_t result = check_result(test_buffer, len, dma_data_type, sign_extend); + + // Reset the flash data buffer + memset(flash_data, 0, len * sizeof(uint8_t)); + + return result; +} + +uint32_t test_read_quad_dma(uint32_t *test_buffer, uint32_t len, dma_trans_data_t dma_data_type, uint8_t sign_extend) { + + dma_data_type_t dma_trans_data_type; + + switch (dma_data_type) { + case TYPE_WORD: + dma_trans_data_type = DMA_DATA_TYPE_WORD; + break; + case TYPE_HALF_WORD: + dma_trans_data_type = DMA_DATA_TYPE_HALF_WORD; + break; + case TYPE_BYTE: + dma_trans_data_type = DMA_DATA_TYPE_BYTE; + break; + default: + break; + } + + dma_init(NULL); + + // The DMA will wait for the SPI HOST/FLASH RX FIFO valid signal + #ifndef USE_SPI_FLASH + uint8_t slot = DMA_TRIG_SLOT_SPI_RX; + #else + uint8_t slot = DMA_TRIG_SLOT_SPI_FLASH_RX; + #endif + + // Set up DMA source target + dma_target_t tgt_src = { + .inc_d1_du = 0, // Target is peripheral, no increment + .type = dma_trans_data_type, + }; + // Target is SPI RX FIFO + tgt_src.ptr = (uint8_t*) (w25q128jw_read_quad_setup((uint32_t*)(TEST_BUFFER_WORDS), flash_data, len)); + // Trigger to control the data flow + tgt_src.trig = slot; + + // Set up DMA destination target + dma_target_t tgt_dst = { + .inc_d1_du = 1, // Increment by 1 data unit (word) + .type = DMA_DATA_TYPE_WORD, + .trig = DMA_TRIG_MEMORY, // Read-write operation to memory + }; + tgt_dst.ptr = (uint8_t*)flash_data; // Target is the data buffer + + // Set up DMA transaction + dma_trans_t trans = { + .src = &tgt_src, + .dst = &tgt_dst, + .end = DMA_TRANS_END_POLLING, + .mode = DMA_TRANS_MODE_SUBADDRESS, + .sign_ext = sign_extend, + }; + + // Size is in data units (words in this case) + trans.size_d1_du = len >> (dma_data_type); + + // Validate, load and launch DMA transaction + dma_config_flags_t res; + res = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); + res = dma_load_transaction(&trans); + res = dma_launch(&trans); + + // Wait for DMA to finish transaction + while(!dma_is_ready(0)); + + uint32_t result = check_result(test_buffer, len, dma_data_type, sign_extend); + + // Reset the flash data buffer + memset(flash_data, 0, len * sizeof(uint8_t)); + + return result; +} + +uint32_t check_result(uint32_t *test_buffer, uint32_t len, dma_trans_data_t dma_data_type, uint32_t sign_extend) { + uint32_t errors = 0; + + for (uint32_t i = 0; i < len>>dma_data_type; i += 1) { + if (test_buffer[i] != flash_data[i]) { + PRINTF("Error in transfer %d %d at position %d: expected %x, got %x\n", dma_data_type, sign_extend, i, test_buffer[i], flash_data[i]); + errors++; + } + } + + if (errors == 0) { + PRINTF("success!\n"); + } else { + PRINTF("failure, %d errors!\n", errors); + } + + return errors; +} diff --git a/sw/applications/example_power_manager/main.c b/sw/applications/example_power_manager/main.c index e1fe60e90..efacddfb9 100644 --- a/sw/applications/example_power_manager/main.c +++ b/sw/applications/example_power_manager/main.c @@ -24,6 +24,8 @@ #endif +#define EXTERNAL_CSR_REG_MIE_MASK (1<<11) + /* By default, printfs are activated for FPGA and disabled for simulation. */ #define PRINTF_IN_FPGA 1 #define PRINTF_IN_SIM 0 @@ -72,6 +74,10 @@ int main(int argc, char *argv[]) //counters uint32_t reset_off, reset_on, switch_off, switch_on, iso_off, iso_on; + CSR_CLEAR_BITS(CSR_REG_MIE, EXTERNAL_CSR_REG_MIE_MASK ); + /* Enable machine-level external interrupt. */ + CSR_SET_BITS(CSR_REG_MIE, EXTERNAL_CSR_REG_MIE_MASK ); + // Setup pads #ifndef TARGET_IS_FPGA pad_control_t pad_control; @@ -460,4 +466,4 @@ int main(int argc, char *argv[]) #endif return EXIT_SUCCESS; -} +} \ No newline at end of file diff --git a/sw/device/bsp/w25q/w25q.c b/sw/device/bsp/w25q/w25q.c index 4696ee48f..39cd22b99 100644 --- a/sw/device/bsp/w25q/w25q.c +++ b/sw/device/bsp/w25q/w25q.c @@ -319,6 +319,54 @@ w25q_error_codes_t w25q128jw_write(uint32_t addr, void *data, uint32_t length, u return status; } +uint32_t* w25q128jw_read_standard_setup(uint32_t addr, void *data, uint32_t length) { + + // Sanity checks + if (w25q128jw_sanity_checks(addr, data, length) != FLASH_OK) return NULL; + + // Take into account the extra bytes (if any) + if (length % 4 != 0) { + //only multiple of 4 bytes are supported in this function + return NULL; + } + + /* + * SET UP DMA + */ + // SPI and SPI_FLASH are the same IP so same register map + uint32_t *fifo_ptr_rx = (uint32_t *)((uintptr_t)spi + SPI_HOST_RXDATA_REG_OFFSET); + + // Address + Read command + uint32_t read_byte_cmd = ((REVERT_24b_ADDR(addr & 0x00ffffff) << 8) | FC_RD); + // Load command to TX FIFO + spi_write_word(spi, read_byte_cmd); + spi_wait_for_ready(spi); + + // Set up segment parameters -> send command and address + const uint32_t cmd_read_1 = spi_create_command((spi_command_t){ + .len = 3, // 4 Bytes + .csaat = true, // Command not finished + .speed = SPI_SPEED_STANDARD, // Single speed + .direction = SPI_DIR_TX_ONLY // Write only + }); + // Load segment parameters to COMMAND register + spi_set_command(spi, cmd_read_1); + spi_wait_for_ready(spi); + + // Set up segment parameters -> read length bytes + const uint32_t cmd_read_2 = spi_create_command((spi_command_t){ + .len = length-1, // len bytes + .csaat = false, // End command + .speed = SPI_SPEED_STANDARD, // Single speed + .direction = SPI_DIR_RX_ONLY // Read only + }); + spi_set_command(spi, cmd_read_2); + spi_wait_for_ready(spi); + + + return fifo_ptr_rx; +} + w25q_error_codes_t w25q128jw_read_standard(uint32_t addr, void* data, uint32_t length) { // Sanity checks if (w25q128jw_sanity_checks(addr, data, length) != FLASH_OK) return FLASH_ERROR; @@ -440,7 +488,6 @@ w25q_error_codes_t w25q128jw_erase_and_write_standard(uint32_t addr, void* data, } - w25q_error_codes_t w25q128jw_read_standard_dma(uint32_t addr, void *data, uint32_t length, uint8_t no_wait_init_dma, uint8_t no_sanity_checks) { // Sanity checks @@ -540,6 +587,7 @@ w25q_error_codes_t w25q128jw_read_standard_dma(uint32_t addr, void *data, uint32 return FLASH_OK; } + w25q_error_codes_t w25q128jw_read_standard_dma_async(uint32_t addr, void *data, uint32_t length) { // Sanity checks @@ -679,6 +727,67 @@ w25q_error_codes_t w25q128jw_erase_and_write_standard_dma(uint32_t addr, void* d } +uint32_t* w25q128jw_read_quad_setup(uint32_t addr, void *data, uint32_t length) { + // Sanity checks + if (w25q128jw_sanity_checks(addr, data, length) != FLASH_OK) return NULL; + + // Send quad read command at standard speed + uint32_t cmd_read_quadIO = FC_RDQIO; + spi_write_word(spi, cmd_read_quadIO); + const uint32_t cmd_read = spi_create_command((spi_command_t){ + .len = 0, // 1 Byte + .csaat = true, // Command not finished + .speed = SPI_SPEED_STANDARD, // Single speed + .direction = SPI_DIR_TX_ONLY // Write only + }); + spi_set_command(spi, cmd_read); + spi_wait_for_ready(spi); + + /* + * Send address at quad speed. + * Last byte is Fxh (here FFh) required by W25Q128JW + */ + uint32_t read_byte_cmd = (REVERT_24b_ADDR(addr) | (0xFF << 24)); + spi_write_word(spi, read_byte_cmd); + const uint32_t cmd_address = spi_create_command((spi_command_t){ + .len = 3, // 3 Byte + .csaat = true, // Command not finished + .speed = SPI_SPEED_QUAD, // Quad speed + .direction = SPI_DIR_TX_ONLY // Write only + }); + spi_set_command(spi, cmd_address); + spi_wait_for_ready(spi); + + // Quad read requires dummy clocks + const uint32_t dummy_clocks_cmd = spi_create_command((spi_command_t){ + #ifndef TARGET_SIM + .len = DUMMY_CLOCKS_FAST_READ_QUAD_IO-1, // W25Q128JW flash needs 4 dummy cycles + #else + .len = DUMMY_CLOCKS_SIM-1, // SPI flash simulation model needs 8 dummy cycles + #endif + .csaat = true, // Command not finished + .speed = SPI_SPEED_QUAD, // Quad speed + .direction = SPI_DIR_DUMMY // Dummy + }); + spi_set_command(spi, dummy_clocks_cmd); + spi_wait_for_ready(spi); + + // Read back the requested data at quad speed + const uint32_t cmd_read_rx = spi_create_command((spi_command_t){ + .len = length-1, // length bytes + .csaat = false, // End command + .speed = SPI_SPEED_QUAD, // Quad speed + .direction = SPI_DIR_RX_ONLY // Read only + }); + spi_set_command(spi, cmd_read_rx); + spi_wait_for_ready(spi); + + /* COMMAND FINISHED */ + + // SPI and SPI_FLASH are the same IP so same register map + return (uint32_t *)((uintptr_t)spi + SPI_HOST_RXDATA_REG_OFFSET); + +} w25q_error_codes_t w25q128jw_read_quad(uint32_t addr, void *data, uint32_t length) { // Sanity checks diff --git a/sw/device/bsp/w25q/w25q128jw.h b/sw/device/bsp/w25q/w25q128jw.h index a638a5e11..6a7c5a4f7 100644 --- a/sw/device/bsp/w25q/w25q128jw.h +++ b/sw/device/bsp/w25q/w25q128jw.h @@ -224,6 +224,16 @@ w25q_error_codes_t w25q128jw_read(uint32_t addr, void* data, uint32_t length); */ w25q_error_codes_t w25q128jw_write(uint32_t addr, void* data, uint32_t length, uint8_t erase_before_write); +/** + * @brief Setup SPI to read from flash at standard speed. (NEEDS TO BE FOLLOWED BY MANUAL SETUP OF THE DMA) + * + * @param addr 24-bit flash address to read from. + * @param data pointer to the data buffer. + * @param length number of bytes to write. + * @return ptr to SPI data register. +*/ +uint32_t* w25q128jw_read_standard_setup(uint32_t addr, void *data, uint32_t length); + /** * @brief Read from flash at standard speed. * @@ -309,6 +319,16 @@ w25q_error_codes_t w25q128jw_erase_and_write_standard_dma(uint32_t addr, void* d */ w25q_error_codes_t w25q128jw_read_quad(uint32_t addr, void* data, uint32_t length); +/** + * @brief Setup SPI to read from flash at quad speed. (NEEDS TO BE FOLLOWED BY MANUAL SETUP OF THE DMA) + * + * @param addr 24-bit flash address to read from. + * @param data pointer to the data buffer. + * @param length number of bytes to write. + * @return ptr to SPI data register. +*/ +uint32_t* w25q128jw_read_quad_setup(uint32_t addr, void *data, uint32_t length); + /** * @brief Write to flash at quad speed. Use this function only to write to unitialized data * diff --git a/sw/device/lib/drivers/dma/dma.c b/sw/device/lib/drivers/dma/dma.c index 3d1023143..6c0921722 100644 --- a/sw/device/lib/drivers/dma/dma.c +++ b/sw/device/lib/drivers/dma/dma.c @@ -60,7 +60,7 @@ extern "C" /** * Returns the mask to enable/disable DMA interrupts. */ -#define DMA_CSR_REG_MIE_MASK (( 1 << 19 ) | (1 << 11 ) ) // @ToDo Add definitions for this 19 and 11 +#define DMA_CSR_REG_MIE_MASK (( 1 << 19 )) // 19 is DMA fast interrupt bit in MIE CSR /** * Mask to determine if an address is multiple of 4 (Word aligned). diff --git a/sw/device/lib/drivers/dma/dma.h b/sw/device/lib/drivers/dma/dma.h index 98a4f0f4b..eb96e2bc9 100644 --- a/sw/device/lib/drivers/dma/dma.h +++ b/sw/device/lib/drivers/dma/dma.h @@ -227,7 +227,8 @@ typedef enum parameters. This generates a circular mode in the source and/or destination pointing to memory. */ DMA_TRANS_MODE_ADDRESS = DMA_MODE_MODE_VALUE_ADDRESS_MODE, /*!< In this mode, the destination address is read from the address port! */ - + DMA_TRANS_MODE_SUBADDRESS = DMA_MODE_MODE_VALUE_SUBADDRESS_MODE, + DMA_TRANS_MODE_HW_FIFO = DMA_MODE_MODE_VALUE_HW_FIFO_MODE, DMA_TRANS_MODE__size, /*!< Not used, only for sanity checks. */ } dma_trans_mode_t; diff --git a/sw/device/lib/drivers/dma/dma_regs.h b/sw/device/lib/drivers/dma/dma_regs.h index c0ef3797a..f0afad059 100644 --- a/sw/device/lib/drivers/dma/dma_regs.h +++ b/sw/device/lib/drivers/dma/dma_regs.h @@ -114,13 +114,15 @@ extern "C" { // Set the operational mode of the DMA #define DMA_MODE_REG_OFFSET 0x38 -#define DMA_MODE_MODE_MASK 0x3 +#define DMA_MODE_MODE_MASK 0x7 #define DMA_MODE_MODE_OFFSET 0 #define DMA_MODE_MODE_FIELD \ ((bitfield_field32_t) { .mask = DMA_MODE_MODE_MASK, .index = DMA_MODE_MODE_OFFSET }) #define DMA_MODE_MODE_VALUE_LINEAR_MODE 0x0 #define DMA_MODE_MODE_VALUE_CIRCULAR_MODE 0x1 #define DMA_MODE_MODE_VALUE_ADDRESS_MODE 0x2 +#define DMA_MODE_MODE_VALUE_SUBADDRESS_MODE 0x3 +#define DMA_MODE_MODE_VALUE_HW_FIFO_MODE 0x4 // Set the dimensionality of the DMA #define DMA_DIM_CONFIG_REG_OFFSET 0x3c @@ -185,6 +187,11 @@ extern "C" { #define DMA_WINDOW_IFR_REG_OFFSET 0x64 #define DMA_WINDOW_IFR_FLAG_BIT 0 +// In HW_FIFO_MODE, is the input data to be sign extended before sending it +// to the hw read fifo? +#define DMA_HW_FIFO_MODE_SIGN_EXT_REG_OFFSET 0x68 +#define DMA_HW_FIFO_MODE_SIGN_EXT_HW_FIFO_SIGNED_BIT 0 + #ifdef __cplusplus } // extern "C" #endif diff --git a/tb/tb.vlt b/tb/tb.vlt index 6ccdab5b7..f55f412a1 100644 --- a/tb/tb.vlt +++ b/tb/tb.vlt @@ -10,6 +10,8 @@ lint_off -rule UNOPTFLAT -file "*tb/ext_xbar.sv" -match "Signal unoptimizable: F lint_off -rule UNDRIVEN -file "*tb/testharness.sv" -match "Signal is not driven: 'jtag_tdo_o'*" lint_off -rule UNDRIVEN -file "*tb/testharness.sv" -match "Signal is not driven: 'ext_ao_peripheral_req'*" lint_off -rule UNDRIVEN -file "*tb/testharness.sv" -match "Signal is not driven: 'ext_ao_peripheral_resp'*" +lint_off -rule UNDRIVEN -file "*tb/testharness.sv" -match "Signal is not driven: 'hw_fifo_req'*" +lint_off -rule UNDRIVEN -file "*tb/testharness.sv" -match "Signal is not driven: 'hw_fifo_resp'*" lint_off -rule SYNCASYNCNET -file "*tb/testharness.sv" -match "*" lint_off -rule WIDTH -file "*tb/testharness.sv" -match "*" lint_off -rule LITENDIAN -file "*tb/testharness.sv" -match "*" diff --git a/tb/testharness.sv b/tb/testharness.sv index e9a2cd3cf..bbc485d09 100644 --- a/tb/testharness.sv +++ b/tb/testharness.sv @@ -48,6 +48,7 @@ module testharness #( import reg_pkg::*; import testharness_pkg::*; import addr_map_rule_pkg::*; + import hw_fifo_pkg::*; import core_v_mini_mcu_pkg::*; localparam AO_SPC_NUM = 1; @@ -126,6 +127,9 @@ module testharness #( reg_req_t periph_slave_req; reg_rsp_t periph_slave_rsp; + hw_fifo_req_t hw_fifo_req; + hw_fifo_resp_t hw_fifo_resp; + reg_pkg::reg_req_t [testharness_pkg::EXT_NPERIPHERALS-1:0] ext_periph_slv_req; reg_pkg::reg_rsp_t [testharness_pkg::EXT_NPERIPHERALS-1:0] ext_periph_slv_rsp; @@ -278,6 +282,8 @@ module testharness #( .ext_dma_write_resp_i(heep_dma_write_resp), .ext_dma_addr_req_o(heep_dma_addr_req), .ext_dma_addr_resp_i(heep_dma_addr_resp), + .hw_fifo_req_o(hw_fifo_req), + .hw_fifo_resp_i(hw_fifo_resp), .ext_ao_peripheral_req_i(ext_ao_peripheral_req), .ext_ao_peripheral_resp_o(ext_ao_peripheral_resp), .ext_peripheral_slave_req_o(periph_slave_req), @@ -477,11 +483,13 @@ module testharness #( // External peripheral example with master port to access memory dma #( - .reg_req_t (reg_pkg::reg_req_t), - .reg_rsp_t (reg_pkg::reg_rsp_t), - .obi_req_t (obi_pkg::obi_req_t), + .reg_req_t(reg_pkg::reg_req_t), + .reg_rsp_t(reg_pkg::reg_rsp_t), + .obi_req_t(obi_pkg::obi_req_t), .obi_resp_t(obi_pkg::obi_resp_t), - .SLOT_NUM (DMA_TRIGGER_SLOT_NUM) + .hw_fifo_req_t(hw_fifo_pkg::hw_fifo_req_t), + .hw_fifo_resp_t(hw_fifo_pkg::hw_fifo_resp_t), + .SLOT_NUM(DMA_TRIGGER_SLOT_NUM) ) dma_i ( .clk_i, .rst_ni, @@ -493,6 +501,8 @@ module testharness #( .dma_read_resp_i(ext_master_resp[testharness_pkg::EXT_MASTER0_IDX]), .dma_write_req_o(ext_master_req[testharness_pkg::EXT_MASTER1_IDX]), .dma_write_resp_i(ext_master_resp[testharness_pkg::EXT_MASTER1_IDX]), + .hw_fifo_req_o(), + .hw_fifo_resp_i(), .dma_addr_req_o(), .dma_addr_resp_i('0), .trigger_slot_i('0),