diff --git a/library/axi_dmac/axi_dmac.v b/library/axi_dmac/axi_dmac.v index bf7e64d51..ac32c12a1 100644 --- a/library/axi_dmac/axi_dmac.v +++ b/library/axi_dmac/axi_dmac.v @@ -284,10 +284,16 @@ localparam REAL_MAX_BYTES_PER_BURST = BYTES_PER_BURST_LIMIT < MAX_BYTES_PER_BURST ? BYTES_PER_BURST_LIMIT : MAX_BYTES_PER_BURST; -/* Align to the length to the wider interface */ -localparam DMA_LENGTH_ALIGN = - BYTES_PER_BEAT_WIDTH_DEST < BYTES_PER_BEAT_WIDTH_SRC ? - BYTES_PER_BEAT_WIDTH_SRC : BYTES_PER_BEAT_WIDTH_DEST; +/* MM has no alignment requirements */ +localparam DMA_LENGTH_ALIGN_SRC = + DMA_TYPE_SRC == DMA_TYPE_AXI_MM ? 0 : BYTES_PER_BEAT_WIDTH_SRC; +localparam DMA_LENGTH_ALIGN_DEST = + DMA_TYPE_DEST == DMA_TYPE_AXI_MM ? 0 : BYTES_PER_BEAT_WIDTH_DEST; + +/* Choose the larger of the two */ + localparam DMA_LENGTH_ALIGN = + DMA_LENGTH_ALIGN_SRC < DMA_LENGTH_ALIGN_DEST ? + DMA_LENGTH_ALIGN_DEST : DMA_LENGTH_ALIGN_SRC; localparam BYTES_PER_BURST_WIDTH = REAL_MAX_BYTES_PER_BURST > 2048 ? 12 : diff --git a/library/axi_dmac/axi_dmac_burst_memory.v b/library/axi_dmac/axi_dmac_burst_memory.v index fbf12a3ea..e9f604c67 100644 --- a/library/axi_dmac/axi_dmac_burst_memory.v +++ b/library/axi_dmac/axi_dmac_burst_memory.v @@ -65,6 +65,7 @@ module axi_dmac_burst_memory #( input dest_data_ready, output [DATA_WIDTH_DEST-1:0] dest_data, output dest_data_last, + output [DATA_WIDTH_DEST/8-1:0] dest_data_strb, output [BYTES_PER_BURST_WIDTH-1:0] dest_burst_info_length, output dest_burst_info_partial, @@ -114,6 +115,7 @@ localparam ADDRESS_WIDTH_SRC = BURST_LEN_WIDTH_SRC + ID_WIDTH - 1; localparam ADDRESS_WIDTH_DEST = BURST_LEN_WIDTH_DEST + ID_WIDTH - 1; localparam BYTES_PER_BEAT_WIDTH_MEM_SRC = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH_SRC; +localparam BYTES_PER_BEAT_WIDTH_DEST = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH_DEST; /* * The burst memory is separated into 2**(ID_WIDTH-1) segments. Each segment can @@ -153,6 +155,7 @@ wire [BURST_LEN_WIDTH_DEST-1:0] dest_burst_len; reg dest_valid = 1'b0; reg dest_mem_data_valid = 1'b0; reg dest_mem_data_last = 1'b0; +reg [DATA_WIDTH_MEM_DEST/8-1:0] dest_mem_data_strb = {DATA_WIDTH_MEM_DEST/8{1'b1}}; reg [BYTES_PER_BURST_WIDTH+1-1-DMA_LENGTH_ALIGN:0] burst_len_mem[0:AUX_FIFO_SIZE-1]; @@ -294,6 +297,16 @@ always @(posedge dest_clk) begin end end +always @(posedge dest_clk) begin + if (dest_beat == 1'b1) begin + if (dest_last == 1'b1) begin + dest_mem_data_strb <= {DATA_WIDTH_MEM_DEST/8{1'b1}} >> ~dest_burst_len_data[BYTES_PER_BEAT_WIDTH_DEST-1:0]; + end else begin + dest_mem_data_strb <= {DATA_WIDTH_MEM_DEST/8{1'b1}}; + end + end +end + assign dest_id_next_inc = inc_id(dest_id_next); always @(posedge dest_clk) begin @@ -391,11 +404,13 @@ axi_dmac_resize_dest #( .mem_data_ready (dest_mem_data_ready), .mem_data (dest_mem_data), .mem_data_last (dest_mem_data_last), + .mem_data_strb (dest_mem_data_strb), .dest_data_valid (dest_data_valid), .dest_data_ready (dest_data_ready), .dest_data (dest_data), - .dest_data_last (dest_data_last) + .dest_data_last (dest_data_last), + .dest_data_strb (dest_data_strb) ); sync_bits #( diff --git a/library/axi_dmac/axi_dmac_resize_dest.v b/library/axi_dmac/axi_dmac_resize_dest.v index c57b78976..8a7eeb237 100644 --- a/library/axi_dmac/axi_dmac_resize_dest.v +++ b/library/axi_dmac/axi_dmac_resize_dest.v @@ -46,11 +46,13 @@ module axi_dmac_resize_dest #( output mem_data_ready, input [DATA_WIDTH_MEM-1:0] mem_data, input mem_data_last, + input [DATA_WIDTH_MEM/8-1:0] mem_data_strb, output dest_data_valid, input dest_data_ready, output [DATA_WIDTH_DEST-1:0] dest_data, - output dest_data_last + output dest_data_last, + output [DATA_WIDTH_DEST/8-1:0] dest_data_strb ); /* @@ -62,6 +64,7 @@ generate if (DATA_WIDTH_DEST == DATA_WIDTH_MEM) begin assign dest_data_valid = mem_data_valid; assign dest_data = mem_data; assign dest_data_last = mem_data_last; + assign dest_data_strb = mem_data_strb; assign mem_data_ready = dest_data_ready; end else begin @@ -71,10 +74,11 @@ end else begin reg valid = 1'b0; reg [RATIO-1:0] last = 'h0; reg [DATA_WIDTH_MEM-1:0] data = 'h0; + reg [DATA_WIDTH_MEM/8-1:0] strb = {DATA_WIDTH_MEM/8{1'b1}}; wire last_beat; - assign last_beat = count == RATIO - 1; + assign last_beat = (count == RATIO - 1) | last[0]; always @(posedge clk) begin if (reset == 1'b1) begin @@ -90,24 +94,43 @@ end else begin if (reset == 1'b1) begin count <= 'h0; end else if (dest_data_ready == 1'b1 && dest_data_valid == 1'b1) begin - count <= count + 1; + if (last_beat == 1'b1) begin + count <= 'h0; + end else begin + count <= count + 1; + end end end assign mem_data_ready = ~valid | (dest_data_ready & last_beat); + integer i; always @(posedge clk) begin if (mem_data_ready == 1'b1) begin data <= mem_data; - last <= {mem_data_last,{RATIO-1{1'b0}}}; + + /* + * Skip those words where strb would be completely zero for the output + * word. We assume that strb is thermometer encoded (i.e. a certain number + * of LSBs are 1'b1 followed by all 1'b0 in the MSBs) and by extension + * that if the first strb bit for a word is zero that means that all strb + * bits for a word will be zero. + */ + for (i = 0; i < RATIO-1; i = i + 1) begin + last[i] <= mem_data_last & ~mem_data_strb[(i+1)*(DATA_WIDTH_MEM/8/RATIO)]; + end + last[RATIO-1] <= mem_data_last; + strb <= mem_data_strb; end else if (dest_data_ready == 1'b1) begin data[DATA_WIDTH_MEM-DATA_WIDTH_DEST-1:0] <= data[DATA_WIDTH_MEM-1:DATA_WIDTH_DEST]; + strb[(DATA_WIDTH_MEM-DATA_WIDTH_DEST)/8-1:0] <= strb[DATA_WIDTH_MEM/8-1:DATA_WIDTH_DEST/8]; last[RATIO-2:0] <= last[RATIO-1:1]; end end assign dest_data_valid = valid; assign dest_data = data[DATA_WIDTH_DEST-1:0]; + assign dest_data_strb = strb[DATA_WIDTH_DEST/8-1:0]; assign dest_data_last = last[0]; end endgenerate diff --git a/library/axi_dmac/axi_dmac_resize_src.v b/library/axi_dmac/axi_dmac_resize_src.v index 1a2b90537..3c5131a7f 100644 --- a/library/axi_dmac/axi_dmac_resize_src.v +++ b/library/axi_dmac/axi_dmac_resize_src.v @@ -82,8 +82,8 @@ end else begin reg valid = 1'b0; reg last = 1'b0; reg [DATA_WIDTH_MEM-1:0] data = 'h0; - reg [BYTES_PER_BEAT_WIDTH_SRC-1:0] valid_bytes; - reg partial_burst; + reg [BYTES_PER_BEAT_WIDTH_SRC-1:0] valid_bytes = 'h00; + reg partial_burst = 1'b0; reg [RATIO_WIDTH-1:0] num_beats = {RATIO_WIDTH{1'b1}}; always @(posedge clk) begin diff --git a/library/axi_dmac/dest_axi_mm.v b/library/axi_dmac/dest_axi_mm.v index 7b54c4853..338bb3841 100644 --- a/library/axi_dmac/dest_axi_mm.v +++ b/library/axi_dmac/dest_axi_mm.v @@ -77,6 +77,7 @@ module dmac_dest_mm_axi #( input fifo_valid, output fifo_ready, input [DMA_DATA_WIDTH-1:0] fifo_data, + input [DMA_DATA_WIDTH/8-1:0] fifo_strb, input fifo_last, input [BYTES_PER_BURST_WIDTH-1:0] dest_burst_info_length, @@ -149,8 +150,7 @@ assign m_axi_wvalid = fifo_valid; assign fifo_ready = m_axi_wready; assign m_axi_wlast = fifo_last; assign m_axi_wdata = fifo_data; - -assign m_axi_wstrb = {(DMA_DATA_WIDTH/8){1'b1}}; +assign m_axi_wstrb = fifo_strb; dmac_response_handler #( .ID_WIDTH(ID_WIDTH) diff --git a/library/axi_dmac/request_arb.v b/library/axi_dmac/request_arb.v index 778a1d964..1c010f0d1 100644 --- a/library/axi_dmac/request_arb.v +++ b/library/axi_dmac/request_arb.v @@ -238,10 +238,12 @@ wire [ID_WIDTH-1:0] dest_response_id; wire dest_valid; wire dest_ready; wire [DMA_DATA_WIDTH_DEST-1:0] dest_data; +wire [DMA_DATA_WIDTH_DEST/8-1:0] dest_strb; wire dest_last; wire dest_fifo_valid; wire dest_fifo_ready; wire [DMA_DATA_WIDTH_DEST-1:0] dest_fifo_data; +wire [DMA_DATA_WIDTH_DEST/8-1:0] dest_fifo_strb; wire dest_fifo_last; wire src_req_valid; @@ -249,6 +251,7 @@ wire src_req_ready; wire [DMA_ADDRESS_WIDTH_DEST-1:0] src_req_dest_address; wire [DMA_ADDRESS_WIDTH_SRC-1:0] src_req_src_address; wire [BEATS_PER_BURST_WIDTH_SRC-1:0] src_req_last_burst_length; +wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_req_last_beat_bytes; wire src_req_sync_transfer_start; wire src_req_xlast; @@ -269,11 +272,13 @@ wire [ID_WIDTH-1:0] src_response_id; wire src_valid; wire [DMA_DATA_WIDTH_SRC-1:0] src_data; +wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_valid_bytes; wire src_last; wire src_partial_burst; wire block_descr_to_dst; wire src_fifo_valid; wire [DMA_DATA_WIDTH_SRC-1:0] src_fifo_data; +wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_fifo_valid_bytes; wire src_fifo_last; wire src_fifo_partial_burst; @@ -388,6 +393,7 @@ dmac_dest_mm_axi #( .fifo_valid(dest_valid), .fifo_ready(dest_ready), .fifo_data(dest_data), + .fifo_strb(dest_strb), .fifo_last(dest_last), .dest_burst_info_length(dest_burst_info_length), @@ -631,6 +637,7 @@ dmac_src_mm_axi #( .req_ready(src_req_ready), .req_address(src_req_src_address), .req_last_burst_length(src_req_last_burst_length), + .req_last_beat_bytes(src_req_last_beat_bytes), .bl_valid(src_bl_valid), .bl_ready(src_bl_ready), @@ -651,6 +658,7 @@ dmac_src_mm_axi #( .fifo_valid(src_valid), .fifo_data(src_data), + .fifo_valid_bytes(src_valid_bytes), .fifo_last(src_last), .m_axi_arready(m_axi_arready), @@ -746,6 +754,8 @@ dmac_src_axi_stream #( .s_axis_xfer_req(s_axis_xfer_req) ); +assign src_valid_bytes = {BYTES_PER_BEAT_WIDTH_SRC{1'b1}}; + util_axis_fifo #( .DATA_WIDTH(ID_WIDTH + 3), .ADDRESS_WIDTH(0), @@ -836,6 +846,8 @@ dmac_src_fifo_inf #( .xfer_req(fifo_wr_xfer_req) ); +assign src_valid_bytes = {BYTES_PER_BEAT_WIDTH_SRC{1'b1}}; + end else begin assign fifo_wr_overflow = 1'b0; @@ -919,7 +931,7 @@ sync_bits #( ); axi_register_slice #( - .DATA_WIDTH(DMA_DATA_WIDTH_SRC + 2), + .DATA_WIDTH(DMA_DATA_WIDTH_SRC + BYTES_PER_BEAT_WIDTH_SRC + 2), .FORWARD_REGISTERED(AXI_SLICE_SRC), .BACKWARD_REGISTERED(0) ) i_src_slice ( @@ -927,10 +939,10 @@ axi_register_slice #( .resetn(src_resetn), .s_axi_valid(src_valid), .s_axi_ready(), - .s_axi_data({src_data,src_last,src_partial_burst}), + .s_axi_data({src_data,src_valid_bytes,src_last,src_partial_burst}), .m_axi_valid(src_fifo_valid), .m_axi_ready(1'b1), /* No backpressure */ - .m_axi_data({src_fifo_data,src_fifo_last,src_fifo_partial_burst}) + .m_axi_data({src_fifo_data,src_fifo_valid_bytes,src_fifo_last,src_fifo_partial_burst}) ); axi_dmac_burst_memory #( @@ -950,7 +962,7 @@ axi_dmac_burst_memory #( .src_data_valid(src_fifo_valid), .src_data(src_fifo_data), .src_data_last(src_fifo_last), - .src_data_valid_bytes({BYTES_PER_BEAT_WIDTH_SRC{1'b1}}), + .src_data_valid_bytes(src_fifo_valid_bytes), .src_data_partial_burst(src_fifo_partial_burst), .src_data_request_id(src_data_request_id), @@ -961,6 +973,7 @@ axi_dmac_burst_memory #( .dest_data_ready(dest_fifo_ready), .dest_data(dest_fifo_data), .dest_data_last(dest_fifo_last), + .dest_data_strb(dest_fifo_strb), .dest_burst_info_length(dest_burst_info_length), .dest_burst_info_partial(dest_burst_info_partial), @@ -975,7 +988,7 @@ axi_dmac_burst_memory #( ); axi_register_slice #( - .DATA_WIDTH(DMA_DATA_WIDTH_DEST + 1), + .DATA_WIDTH(DMA_DATA_WIDTH_DEST + DMA_DATA_WIDTH_DEST / 8 + 1), .FORWARD_REGISTERED(AXI_SLICE_DEST), .BACKWARD_REGISTERED(AXI_SLICE_DEST) ) i_dest_slice ( @@ -985,12 +998,14 @@ axi_register_slice #( .s_axi_ready(dest_fifo_ready), .s_axi_data({ dest_fifo_last, + dest_fifo_strb, dest_fifo_data }), .m_axi_valid(dest_valid), .m_axi_ready(dest_ready), .m_axi_data({ dest_last, + dest_strb, dest_data }) ); @@ -1030,7 +1045,7 @@ util_axis_fifo #( ); util_axis_fifo #( - .DATA_WIDTH(DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + BEATS_PER_BURST_WIDTH_SRC + 2), + .DATA_WIDTH(DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + BYTES_PER_BURST_WIDTH + 2), .ADDRESS_WIDTH(0), .ASYNC_CLK(ASYNC_CLK_REQ_SRC) ) i_src_req_fifo ( @@ -1042,7 +1057,7 @@ util_axis_fifo #( .s_axis_data({ req_dest_address, req_src_address, - req_length[BYTES_PER_BURST_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC], + req_length[BYTES_PER_BURST_WIDTH-1:0], req_sync_transfer_start, req_xlast }), @@ -1056,6 +1071,7 @@ util_axis_fifo #( src_req_dest_address, src_req_src_address, src_req_last_burst_length, + src_req_last_beat_bytes, src_req_sync_transfer_start, src_req_xlast }), diff --git a/library/axi_dmac/src_axi_mm.v b/library/axi_dmac/src_axi_mm.v index e813a01d2..5d5291d93 100644 --- a/library/axi_dmac/src_axi_mm.v +++ b/library/axi_dmac/src_axi_mm.v @@ -51,6 +51,7 @@ module dmac_src_mm_axi #( output req_ready, input [DMA_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH] req_address, input [BEATS_PER_BURST_WIDTH-1:0] req_last_burst_length, + input [BYTES_PER_BEAT_WIDTH-1:0] req_last_beat_bytes, input enable, output reg enabled = 1'b0, @@ -73,6 +74,7 @@ module dmac_src_mm_axi #( output fifo_valid, output [DMA_DATA_WIDTH-1:0] fifo_data, + output [BYTES_PER_BEAT_WIDTH-1:0] fifo_valid_bytes, output fifo_last, // Read address @@ -108,6 +110,23 @@ assign response_id = id; assign measured_last_burst_length = req_last_burst_length; +reg [BYTES_PER_BEAT_WIDTH-1:0] last_beat_bytes; +reg [BYTES_PER_BEAT_WIDTH-1:0] last_beat_bytes_mem[0:2**ID_WIDTH-1]; + +assign fifo_valid_bytes = last_beat_bytes_mem[data_id]; + +always @(posedge m_axi_aclk) begin + if (bl_ready_ag == 1'b1 && bl_valid_ag == 1'b1) begin + last_beat_bytes <= req_last_beat_bytes; + end +end + + +always @(posedge m_axi_aclk) begin + last_beat_bytes_mem[address_id] <= address_eot ? last_beat_bytes : + {BYTES_PER_BEAT_WIDTH{1'b1}}; +end + splitter #( .NUM_M(3) ) i_req_splitter ( diff --git a/library/axi_dmac/tb/dma_read_tb.v b/library/axi_dmac/tb/dma_read_tb.v index 10b8d21d9..a329eb035 100644 --- a/library/axi_dmac/tb/dma_read_tb.v +++ b/library/axi_dmac/tb/dma_read_tb.v @@ -45,7 +45,6 @@ module dmac_dma_read_tb; `include "tb_base.v" localparam TRANSFER_ADDR = 32'h80000000; - localparam WIDTH_MAX = WIDTH_DEST > WIDTH_SRC ? WIDTH_DEST : WIDTH_SRC; reg req_valid = 1'b1; wire req_ready; @@ -109,7 +108,7 @@ module dmac_dma_read_tb; .DMA_TYPE_DEST(2), .DMA_DATA_WIDTH_SRC(WIDTH_SRC), .DMA_DATA_WIDTH_DEST(WIDTH_DEST), - .DMA_LENGTH_ALIGN($clog2(WIDTH_MAX/8)), + .DMA_LENGTH_ALIGN($clog2(WIDTH_DEST/8)), .FIFO_SIZE(8) ) transfer ( .m_src_axi_aclk(clk), diff --git a/library/axi_dmac/tb/dma_write_tb.v b/library/axi_dmac/tb/dma_write_tb.v index 17fcd4aaf..7582229fb 100644 --- a/library/axi_dmac/tb/dma_write_tb.v +++ b/library/axi_dmac/tb/dma_write_tb.v @@ -45,7 +45,6 @@ module dmac_dma_write_tb; `include "tb_base.v" localparam TRANSFER_ADDR = 32'h80000000; - localparam WIDTH_MAX = WIDTH_DEST > WIDTH_SRC ? WIDTH_DEST : WIDTH_SRC; reg req_valid = 1'b1; wire req_ready; @@ -109,7 +108,7 @@ module dmac_dma_write_tb; axi_dmac_transfer #( .DMA_DATA_WIDTH_SRC(WIDTH_SRC), .DMA_DATA_WIDTH_DEST(WIDTH_DEST), - .DMA_LENGTH_ALIGN($clog2(WIDTH_MAX/8)) + .DMA_LENGTH_ALIGN($clog2(WIDTH_SRC/8)) ) i_transfer ( .m_dest_axi_aclk (clk), .m_dest_axi_aresetn(resetn),