From 804c57aabc144f8067741eadd4ca48546b25aeab Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 18 Oct 2018 15:58:53 +0200 Subject: [PATCH] axi_dmac: Remove length alignment requirement for MM interfaces The DMAC has the requirement that the length of the transfer is aligned to the widest interface width. E.g. if the widest interface is 256 bit or 32 bytes the length of the transfer needs to be a multiple of 32. This restriction can be relaxed for the memory mapped interfaces. This is done by partially ignoring data of a beat from/to the MM interface. For write access the stb bits are used to mask out bytes that do not contain valid data. For read access a full beat is read but part of the data is discarded. This works fine as long as the read access is side effect free. I.e. this method should not be used to access data from memory mapped peripherals like a FIFO. This means that for example the length alignment requirement of a DMA configured for a 64-bit memory and a 16-bit streaming interface is now only 2 bytes instead of 8 bytes as before. Note that the address alignment requirement is not affected by this. The address still needs to be aligned to the width of the MM interface that it belongs to. Signed-off-by: Lars-Peter Clausen --- library/axi_dmac/axi_dmac.v | 14 ++++++++--- library/axi_dmac/axi_dmac_burst_memory.v | 17 ++++++++++++- library/axi_dmac/axi_dmac_resize_dest.v | 31 +++++++++++++++++++++--- library/axi_dmac/axi_dmac_resize_src.v | 4 +-- library/axi_dmac/dest_axi_mm.v | 4 +-- library/axi_dmac/request_arb.v | 30 +++++++++++++++++------ library/axi_dmac/src_axi_mm.v | 19 +++++++++++++++ library/axi_dmac/tb/dma_read_tb.v | 3 +-- library/axi_dmac/tb/dma_write_tb.v | 3 +-- 9 files changed, 101 insertions(+), 24 deletions(-) diff --git a/library/axi_dmac/axi_dmac.v b/library/axi_dmac/axi_dmac.v index bf7e64d51..ac32c12a1 100644 --- a/library/axi_dmac/axi_dmac.v +++ b/library/axi_dmac/axi_dmac.v @@ -284,10 +284,16 @@ localparam REAL_MAX_BYTES_PER_BURST = BYTES_PER_BURST_LIMIT < MAX_BYTES_PER_BURST ? BYTES_PER_BURST_LIMIT : MAX_BYTES_PER_BURST; -/* Align to the length to the wider interface */ -localparam DMA_LENGTH_ALIGN = - BYTES_PER_BEAT_WIDTH_DEST < BYTES_PER_BEAT_WIDTH_SRC ? - BYTES_PER_BEAT_WIDTH_SRC : BYTES_PER_BEAT_WIDTH_DEST; +/* MM has no alignment requirements */ +localparam DMA_LENGTH_ALIGN_SRC = + DMA_TYPE_SRC == DMA_TYPE_AXI_MM ? 0 : BYTES_PER_BEAT_WIDTH_SRC; +localparam DMA_LENGTH_ALIGN_DEST = + DMA_TYPE_DEST == DMA_TYPE_AXI_MM ? 0 : BYTES_PER_BEAT_WIDTH_DEST; + +/* Choose the larger of the two */ + localparam DMA_LENGTH_ALIGN = + DMA_LENGTH_ALIGN_SRC < DMA_LENGTH_ALIGN_DEST ? + DMA_LENGTH_ALIGN_DEST : DMA_LENGTH_ALIGN_SRC; localparam BYTES_PER_BURST_WIDTH = REAL_MAX_BYTES_PER_BURST > 2048 ? 12 : diff --git a/library/axi_dmac/axi_dmac_burst_memory.v b/library/axi_dmac/axi_dmac_burst_memory.v index fbf12a3ea..e9f604c67 100644 --- a/library/axi_dmac/axi_dmac_burst_memory.v +++ b/library/axi_dmac/axi_dmac_burst_memory.v @@ -65,6 +65,7 @@ module axi_dmac_burst_memory #( input dest_data_ready, output [DATA_WIDTH_DEST-1:0] dest_data, output dest_data_last, + output [DATA_WIDTH_DEST/8-1:0] dest_data_strb, output [BYTES_PER_BURST_WIDTH-1:0] dest_burst_info_length, output dest_burst_info_partial, @@ -114,6 +115,7 @@ localparam ADDRESS_WIDTH_SRC = BURST_LEN_WIDTH_SRC + ID_WIDTH - 1; localparam ADDRESS_WIDTH_DEST = BURST_LEN_WIDTH_DEST + ID_WIDTH - 1; localparam BYTES_PER_BEAT_WIDTH_MEM_SRC = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH_SRC; +localparam BYTES_PER_BEAT_WIDTH_DEST = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH_DEST; /* * The burst memory is separated into 2**(ID_WIDTH-1) segments. Each segment can @@ -153,6 +155,7 @@ wire [BURST_LEN_WIDTH_DEST-1:0] dest_burst_len; reg dest_valid = 1'b0; reg dest_mem_data_valid = 1'b0; reg dest_mem_data_last = 1'b0; +reg [DATA_WIDTH_MEM_DEST/8-1:0] dest_mem_data_strb = {DATA_WIDTH_MEM_DEST/8{1'b1}}; reg [BYTES_PER_BURST_WIDTH+1-1-DMA_LENGTH_ALIGN:0] burst_len_mem[0:AUX_FIFO_SIZE-1]; @@ -294,6 +297,16 @@ always @(posedge dest_clk) begin end end +always @(posedge dest_clk) begin + if (dest_beat == 1'b1) begin + if (dest_last == 1'b1) begin + dest_mem_data_strb <= {DATA_WIDTH_MEM_DEST/8{1'b1}} >> ~dest_burst_len_data[BYTES_PER_BEAT_WIDTH_DEST-1:0]; + end else begin + dest_mem_data_strb <= {DATA_WIDTH_MEM_DEST/8{1'b1}}; + end + end +end + assign dest_id_next_inc = inc_id(dest_id_next); always @(posedge dest_clk) begin @@ -391,11 +404,13 @@ axi_dmac_resize_dest #( .mem_data_ready (dest_mem_data_ready), .mem_data (dest_mem_data), .mem_data_last (dest_mem_data_last), + .mem_data_strb (dest_mem_data_strb), .dest_data_valid (dest_data_valid), .dest_data_ready (dest_data_ready), .dest_data (dest_data), - .dest_data_last (dest_data_last) + .dest_data_last (dest_data_last), + .dest_data_strb (dest_data_strb) ); sync_bits #( diff --git a/library/axi_dmac/axi_dmac_resize_dest.v b/library/axi_dmac/axi_dmac_resize_dest.v index c57b78976..8a7eeb237 100644 --- a/library/axi_dmac/axi_dmac_resize_dest.v +++ b/library/axi_dmac/axi_dmac_resize_dest.v @@ -46,11 +46,13 @@ module axi_dmac_resize_dest #( output mem_data_ready, input [DATA_WIDTH_MEM-1:0] mem_data, input mem_data_last, + input [DATA_WIDTH_MEM/8-1:0] mem_data_strb, output dest_data_valid, input dest_data_ready, output [DATA_WIDTH_DEST-1:0] dest_data, - output dest_data_last + output dest_data_last, + output [DATA_WIDTH_DEST/8-1:0] dest_data_strb ); /* @@ -62,6 +64,7 @@ generate if (DATA_WIDTH_DEST == DATA_WIDTH_MEM) begin assign dest_data_valid = mem_data_valid; assign dest_data = mem_data; assign dest_data_last = mem_data_last; + assign dest_data_strb = mem_data_strb; assign mem_data_ready = dest_data_ready; end else begin @@ -71,10 +74,11 @@ end else begin reg valid = 1'b0; reg [RATIO-1:0] last = 'h0; reg [DATA_WIDTH_MEM-1:0] data = 'h0; + reg [DATA_WIDTH_MEM/8-1:0] strb = {DATA_WIDTH_MEM/8{1'b1}}; wire last_beat; - assign last_beat = count == RATIO - 1; + assign last_beat = (count == RATIO - 1) | last[0]; always @(posedge clk) begin if (reset == 1'b1) begin @@ -90,24 +94,43 @@ end else begin if (reset == 1'b1) begin count <= 'h0; end else if (dest_data_ready == 1'b1 && dest_data_valid == 1'b1) begin - count <= count + 1; + if (last_beat == 1'b1) begin + count <= 'h0; + end else begin + count <= count + 1; + end end end assign mem_data_ready = ~valid | (dest_data_ready & last_beat); + integer i; always @(posedge clk) begin if (mem_data_ready == 1'b1) begin data <= mem_data; - last <= {mem_data_last,{RATIO-1{1'b0}}}; + + /* + * Skip those words where strb would be completely zero for the output + * word. We assume that strb is thermometer encoded (i.e. a certain number + * of LSBs are 1'b1 followed by all 1'b0 in the MSBs) and by extension + * that if the first strb bit for a word is zero that means that all strb + * bits for a word will be zero. + */ + for (i = 0; i < RATIO-1; i = i + 1) begin + last[i] <= mem_data_last & ~mem_data_strb[(i+1)*(DATA_WIDTH_MEM/8/RATIO)]; + end + last[RATIO-1] <= mem_data_last; + strb <= mem_data_strb; end else if (dest_data_ready == 1'b1) begin data[DATA_WIDTH_MEM-DATA_WIDTH_DEST-1:0] <= data[DATA_WIDTH_MEM-1:DATA_WIDTH_DEST]; + strb[(DATA_WIDTH_MEM-DATA_WIDTH_DEST)/8-1:0] <= strb[DATA_WIDTH_MEM/8-1:DATA_WIDTH_DEST/8]; last[RATIO-2:0] <= last[RATIO-1:1]; end end assign dest_data_valid = valid; assign dest_data = data[DATA_WIDTH_DEST-1:0]; + assign dest_data_strb = strb[DATA_WIDTH_DEST/8-1:0]; assign dest_data_last = last[0]; end endgenerate diff --git a/library/axi_dmac/axi_dmac_resize_src.v b/library/axi_dmac/axi_dmac_resize_src.v index 1a2b90537..3c5131a7f 100644 --- a/library/axi_dmac/axi_dmac_resize_src.v +++ b/library/axi_dmac/axi_dmac_resize_src.v @@ -82,8 +82,8 @@ end else begin reg valid = 1'b0; reg last = 1'b0; reg [DATA_WIDTH_MEM-1:0] data = 'h0; - reg [BYTES_PER_BEAT_WIDTH_SRC-1:0] valid_bytes; - reg partial_burst; + reg [BYTES_PER_BEAT_WIDTH_SRC-1:0] valid_bytes = 'h00; + reg partial_burst = 1'b0; reg [RATIO_WIDTH-1:0] num_beats = {RATIO_WIDTH{1'b1}}; always @(posedge clk) begin diff --git a/library/axi_dmac/dest_axi_mm.v b/library/axi_dmac/dest_axi_mm.v index 7b54c4853..338bb3841 100644 --- a/library/axi_dmac/dest_axi_mm.v +++ b/library/axi_dmac/dest_axi_mm.v @@ -77,6 +77,7 @@ module dmac_dest_mm_axi #( input fifo_valid, output fifo_ready, input [DMA_DATA_WIDTH-1:0] fifo_data, + input [DMA_DATA_WIDTH/8-1:0] fifo_strb, input fifo_last, input [BYTES_PER_BURST_WIDTH-1:0] dest_burst_info_length, @@ -149,8 +150,7 @@ assign m_axi_wvalid = fifo_valid; assign fifo_ready = m_axi_wready; assign m_axi_wlast = fifo_last; assign m_axi_wdata = fifo_data; - -assign m_axi_wstrb = {(DMA_DATA_WIDTH/8){1'b1}}; +assign m_axi_wstrb = fifo_strb; dmac_response_handler #( .ID_WIDTH(ID_WIDTH) diff --git a/library/axi_dmac/request_arb.v b/library/axi_dmac/request_arb.v index 778a1d964..1c010f0d1 100644 --- a/library/axi_dmac/request_arb.v +++ b/library/axi_dmac/request_arb.v @@ -238,10 +238,12 @@ wire [ID_WIDTH-1:0] dest_response_id; wire dest_valid; wire dest_ready; wire [DMA_DATA_WIDTH_DEST-1:0] dest_data; +wire [DMA_DATA_WIDTH_DEST/8-1:0] dest_strb; wire dest_last; wire dest_fifo_valid; wire dest_fifo_ready; wire [DMA_DATA_WIDTH_DEST-1:0] dest_fifo_data; +wire [DMA_DATA_WIDTH_DEST/8-1:0] dest_fifo_strb; wire dest_fifo_last; wire src_req_valid; @@ -249,6 +251,7 @@ wire src_req_ready; wire [DMA_ADDRESS_WIDTH_DEST-1:0] src_req_dest_address; wire [DMA_ADDRESS_WIDTH_SRC-1:0] src_req_src_address; wire [BEATS_PER_BURST_WIDTH_SRC-1:0] src_req_last_burst_length; +wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_req_last_beat_bytes; wire src_req_sync_transfer_start; wire src_req_xlast; @@ -269,11 +272,13 @@ wire [ID_WIDTH-1:0] src_response_id; wire src_valid; wire [DMA_DATA_WIDTH_SRC-1:0] src_data; +wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_valid_bytes; wire src_last; wire src_partial_burst; wire block_descr_to_dst; wire src_fifo_valid; wire [DMA_DATA_WIDTH_SRC-1:0] src_fifo_data; +wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_fifo_valid_bytes; wire src_fifo_last; wire src_fifo_partial_burst; @@ -388,6 +393,7 @@ dmac_dest_mm_axi #( .fifo_valid(dest_valid), .fifo_ready(dest_ready), .fifo_data(dest_data), + .fifo_strb(dest_strb), .fifo_last(dest_last), .dest_burst_info_length(dest_burst_info_length), @@ -631,6 +637,7 @@ dmac_src_mm_axi #( .req_ready(src_req_ready), .req_address(src_req_src_address), .req_last_burst_length(src_req_last_burst_length), + .req_last_beat_bytes(src_req_last_beat_bytes), .bl_valid(src_bl_valid), .bl_ready(src_bl_ready), @@ -651,6 +658,7 @@ dmac_src_mm_axi #( .fifo_valid(src_valid), .fifo_data(src_data), + .fifo_valid_bytes(src_valid_bytes), .fifo_last(src_last), .m_axi_arready(m_axi_arready), @@ -746,6 +754,8 @@ dmac_src_axi_stream #( .s_axis_xfer_req(s_axis_xfer_req) ); +assign src_valid_bytes = {BYTES_PER_BEAT_WIDTH_SRC{1'b1}}; + util_axis_fifo #( .DATA_WIDTH(ID_WIDTH + 3), .ADDRESS_WIDTH(0), @@ -836,6 +846,8 @@ dmac_src_fifo_inf #( .xfer_req(fifo_wr_xfer_req) ); +assign src_valid_bytes = {BYTES_PER_BEAT_WIDTH_SRC{1'b1}}; + end else begin assign fifo_wr_overflow = 1'b0; @@ -919,7 +931,7 @@ sync_bits #( ); axi_register_slice #( - .DATA_WIDTH(DMA_DATA_WIDTH_SRC + 2), + .DATA_WIDTH(DMA_DATA_WIDTH_SRC + BYTES_PER_BEAT_WIDTH_SRC + 2), .FORWARD_REGISTERED(AXI_SLICE_SRC), .BACKWARD_REGISTERED(0) ) i_src_slice ( @@ -927,10 +939,10 @@ axi_register_slice #( .resetn(src_resetn), .s_axi_valid(src_valid), .s_axi_ready(), - .s_axi_data({src_data,src_last,src_partial_burst}), + .s_axi_data({src_data,src_valid_bytes,src_last,src_partial_burst}), .m_axi_valid(src_fifo_valid), .m_axi_ready(1'b1), /* No backpressure */ - .m_axi_data({src_fifo_data,src_fifo_last,src_fifo_partial_burst}) + .m_axi_data({src_fifo_data,src_fifo_valid_bytes,src_fifo_last,src_fifo_partial_burst}) ); axi_dmac_burst_memory #( @@ -950,7 +962,7 @@ axi_dmac_burst_memory #( .src_data_valid(src_fifo_valid), .src_data(src_fifo_data), .src_data_last(src_fifo_last), - .src_data_valid_bytes({BYTES_PER_BEAT_WIDTH_SRC{1'b1}}), + .src_data_valid_bytes(src_fifo_valid_bytes), .src_data_partial_burst(src_fifo_partial_burst), .src_data_request_id(src_data_request_id), @@ -961,6 +973,7 @@ axi_dmac_burst_memory #( .dest_data_ready(dest_fifo_ready), .dest_data(dest_fifo_data), .dest_data_last(dest_fifo_last), + .dest_data_strb(dest_fifo_strb), .dest_burst_info_length(dest_burst_info_length), .dest_burst_info_partial(dest_burst_info_partial), @@ -975,7 +988,7 @@ axi_dmac_burst_memory #( ); axi_register_slice #( - .DATA_WIDTH(DMA_DATA_WIDTH_DEST + 1), + .DATA_WIDTH(DMA_DATA_WIDTH_DEST + DMA_DATA_WIDTH_DEST / 8 + 1), .FORWARD_REGISTERED(AXI_SLICE_DEST), .BACKWARD_REGISTERED(AXI_SLICE_DEST) ) i_dest_slice ( @@ -985,12 +998,14 @@ axi_register_slice #( .s_axi_ready(dest_fifo_ready), .s_axi_data({ dest_fifo_last, + dest_fifo_strb, dest_fifo_data }), .m_axi_valid(dest_valid), .m_axi_ready(dest_ready), .m_axi_data({ dest_last, + dest_strb, dest_data }) ); @@ -1030,7 +1045,7 @@ util_axis_fifo #( ); util_axis_fifo #( - .DATA_WIDTH(DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + BEATS_PER_BURST_WIDTH_SRC + 2), + .DATA_WIDTH(DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + BYTES_PER_BURST_WIDTH + 2), .ADDRESS_WIDTH(0), .ASYNC_CLK(ASYNC_CLK_REQ_SRC) ) i_src_req_fifo ( @@ -1042,7 +1057,7 @@ util_axis_fifo #( .s_axis_data({ req_dest_address, req_src_address, - req_length[BYTES_PER_BURST_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC], + req_length[BYTES_PER_BURST_WIDTH-1:0], req_sync_transfer_start, req_xlast }), @@ -1056,6 +1071,7 @@ util_axis_fifo #( src_req_dest_address, src_req_src_address, src_req_last_burst_length, + src_req_last_beat_bytes, src_req_sync_transfer_start, src_req_xlast }), diff --git a/library/axi_dmac/src_axi_mm.v b/library/axi_dmac/src_axi_mm.v index e813a01d2..5d5291d93 100644 --- a/library/axi_dmac/src_axi_mm.v +++ b/library/axi_dmac/src_axi_mm.v @@ -51,6 +51,7 @@ module dmac_src_mm_axi #( output req_ready, input [DMA_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH] req_address, input [BEATS_PER_BURST_WIDTH-1:0] req_last_burst_length, + input [BYTES_PER_BEAT_WIDTH-1:0] req_last_beat_bytes, input enable, output reg enabled = 1'b0, @@ -73,6 +74,7 @@ module dmac_src_mm_axi #( output fifo_valid, output [DMA_DATA_WIDTH-1:0] fifo_data, + output [BYTES_PER_BEAT_WIDTH-1:0] fifo_valid_bytes, output fifo_last, // Read address @@ -108,6 +110,23 @@ assign response_id = id; assign measured_last_burst_length = req_last_burst_length; +reg [BYTES_PER_BEAT_WIDTH-1:0] last_beat_bytes; +reg [BYTES_PER_BEAT_WIDTH-1:0] last_beat_bytes_mem[0:2**ID_WIDTH-1]; + +assign fifo_valid_bytes = last_beat_bytes_mem[data_id]; + +always @(posedge m_axi_aclk) begin + if (bl_ready_ag == 1'b1 && bl_valid_ag == 1'b1) begin + last_beat_bytes <= req_last_beat_bytes; + end +end + + +always @(posedge m_axi_aclk) begin + last_beat_bytes_mem[address_id] <= address_eot ? last_beat_bytes : + {BYTES_PER_BEAT_WIDTH{1'b1}}; +end + splitter #( .NUM_M(3) ) i_req_splitter ( diff --git a/library/axi_dmac/tb/dma_read_tb.v b/library/axi_dmac/tb/dma_read_tb.v index 10b8d21d9..a329eb035 100644 --- a/library/axi_dmac/tb/dma_read_tb.v +++ b/library/axi_dmac/tb/dma_read_tb.v @@ -45,7 +45,6 @@ module dmac_dma_read_tb; `include "tb_base.v" localparam TRANSFER_ADDR = 32'h80000000; - localparam WIDTH_MAX = WIDTH_DEST > WIDTH_SRC ? WIDTH_DEST : WIDTH_SRC; reg req_valid = 1'b1; wire req_ready; @@ -109,7 +108,7 @@ module dmac_dma_read_tb; .DMA_TYPE_DEST(2), .DMA_DATA_WIDTH_SRC(WIDTH_SRC), .DMA_DATA_WIDTH_DEST(WIDTH_DEST), - .DMA_LENGTH_ALIGN($clog2(WIDTH_MAX/8)), + .DMA_LENGTH_ALIGN($clog2(WIDTH_DEST/8)), .FIFO_SIZE(8) ) transfer ( .m_src_axi_aclk(clk), diff --git a/library/axi_dmac/tb/dma_write_tb.v b/library/axi_dmac/tb/dma_write_tb.v index 17fcd4aaf..7582229fb 100644 --- a/library/axi_dmac/tb/dma_write_tb.v +++ b/library/axi_dmac/tb/dma_write_tb.v @@ -45,7 +45,6 @@ module dmac_dma_write_tb; `include "tb_base.v" localparam TRANSFER_ADDR = 32'h80000000; - localparam WIDTH_MAX = WIDTH_DEST > WIDTH_SRC ? WIDTH_DEST : WIDTH_SRC; reg req_valid = 1'b1; wire req_ready; @@ -109,7 +108,7 @@ module dmac_dma_write_tb; axi_dmac_transfer #( .DMA_DATA_WIDTH_SRC(WIDTH_SRC), .DMA_DATA_WIDTH_DEST(WIDTH_DEST), - .DMA_LENGTH_ALIGN($clog2(WIDTH_MAX/8)) + .DMA_LENGTH_ALIGN($clog2(WIDTH_SRC/8)) ) i_transfer ( .m_dest_axi_aclk (clk), .m_dest_axi_aresetn(resetn),