axi_dmac: Remove length alignment requirement for MM interfaces

The DMAC has the requirement that the length of the transfer is aligned to
the widest interface width. E.g. if the widest interface is 256 bit or 32
bytes the length of the transfer needs to be a multiple of 32.

This restriction can be relaxed for the memory mapped interfaces. This is
done by partially ignoring data of a beat from/to the MM interface.

For write access the stb bits are used to mask out bytes that do not
contain valid data.

For read access a full beat is read but part of the data is discarded. This
works fine as long as the read access is side effect free. I.e. this method
should not be used to access data from memory mapped peripherals like a
FIFO.

This means that for example the length alignment requirement of a DMA
configured for a 64-bit memory and a 16-bit streaming interface is now only
2 bytes instead of 8 bytes as before.

Note that the address alignment requirement is not affected by this. The
address still needs to be aligned to the width of the MM interface that it
belongs to.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
main
Lars-Peter Clausen 2018-10-18 15:58:53 +02:00 committed by Laszlo Nagy
parent 7986310fa0
commit 804c57aabc
9 changed files with 101 additions and 24 deletions

View File

@ -284,10 +284,16 @@ localparam REAL_MAX_BYTES_PER_BURST =
BYTES_PER_BURST_LIMIT < MAX_BYTES_PER_BURST ?
BYTES_PER_BURST_LIMIT : MAX_BYTES_PER_BURST;
/* Align to the length to the wider interface */
localparam DMA_LENGTH_ALIGN =
BYTES_PER_BEAT_WIDTH_DEST < BYTES_PER_BEAT_WIDTH_SRC ?
BYTES_PER_BEAT_WIDTH_SRC : BYTES_PER_BEAT_WIDTH_DEST;
/* MM has no alignment requirements */
localparam DMA_LENGTH_ALIGN_SRC =
DMA_TYPE_SRC == DMA_TYPE_AXI_MM ? 0 : BYTES_PER_BEAT_WIDTH_SRC;
localparam DMA_LENGTH_ALIGN_DEST =
DMA_TYPE_DEST == DMA_TYPE_AXI_MM ? 0 : BYTES_PER_BEAT_WIDTH_DEST;
/* Choose the larger of the two */
localparam DMA_LENGTH_ALIGN =
DMA_LENGTH_ALIGN_SRC < DMA_LENGTH_ALIGN_DEST ?
DMA_LENGTH_ALIGN_DEST : DMA_LENGTH_ALIGN_SRC;
localparam BYTES_PER_BURST_WIDTH =
REAL_MAX_BYTES_PER_BURST > 2048 ? 12 :

View File

@ -65,6 +65,7 @@ module axi_dmac_burst_memory #(
input dest_data_ready,
output [DATA_WIDTH_DEST-1:0] dest_data,
output dest_data_last,
output [DATA_WIDTH_DEST/8-1:0] dest_data_strb,
output [BYTES_PER_BURST_WIDTH-1:0] dest_burst_info_length,
output dest_burst_info_partial,
@ -114,6 +115,7 @@ localparam ADDRESS_WIDTH_SRC = BURST_LEN_WIDTH_SRC + ID_WIDTH - 1;
localparam ADDRESS_WIDTH_DEST = BURST_LEN_WIDTH_DEST + ID_WIDTH - 1;
localparam BYTES_PER_BEAT_WIDTH_MEM_SRC = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH_SRC;
localparam BYTES_PER_BEAT_WIDTH_DEST = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH_DEST;
/*
* The burst memory is separated into 2**(ID_WIDTH-1) segments. Each segment can
@ -153,6 +155,7 @@ wire [BURST_LEN_WIDTH_DEST-1:0] dest_burst_len;
reg dest_valid = 1'b0;
reg dest_mem_data_valid = 1'b0;
reg dest_mem_data_last = 1'b0;
reg [DATA_WIDTH_MEM_DEST/8-1:0] dest_mem_data_strb = {DATA_WIDTH_MEM_DEST/8{1'b1}};
reg [BYTES_PER_BURST_WIDTH+1-1-DMA_LENGTH_ALIGN:0] burst_len_mem[0:AUX_FIFO_SIZE-1];
@ -294,6 +297,16 @@ always @(posedge dest_clk) begin
end
end
always @(posedge dest_clk) begin
if (dest_beat == 1'b1) begin
if (dest_last == 1'b1) begin
dest_mem_data_strb <= {DATA_WIDTH_MEM_DEST/8{1'b1}} >> ~dest_burst_len_data[BYTES_PER_BEAT_WIDTH_DEST-1:0];
end else begin
dest_mem_data_strb <= {DATA_WIDTH_MEM_DEST/8{1'b1}};
end
end
end
assign dest_id_next_inc = inc_id(dest_id_next);
always @(posedge dest_clk) begin
@ -391,11 +404,13 @@ axi_dmac_resize_dest #(
.mem_data_ready (dest_mem_data_ready),
.mem_data (dest_mem_data),
.mem_data_last (dest_mem_data_last),
.mem_data_strb (dest_mem_data_strb),
.dest_data_valid (dest_data_valid),
.dest_data_ready (dest_data_ready),
.dest_data (dest_data),
.dest_data_last (dest_data_last)
.dest_data_last (dest_data_last),
.dest_data_strb (dest_data_strb)
);
sync_bits #(

View File

@ -46,11 +46,13 @@ module axi_dmac_resize_dest #(
output mem_data_ready,
input [DATA_WIDTH_MEM-1:0] mem_data,
input mem_data_last,
input [DATA_WIDTH_MEM/8-1:0] mem_data_strb,
output dest_data_valid,
input dest_data_ready,
output [DATA_WIDTH_DEST-1:0] dest_data,
output dest_data_last
output dest_data_last,
output [DATA_WIDTH_DEST/8-1:0] dest_data_strb
);
/*
@ -62,6 +64,7 @@ generate if (DATA_WIDTH_DEST == DATA_WIDTH_MEM) begin
assign dest_data_valid = mem_data_valid;
assign dest_data = mem_data;
assign dest_data_last = mem_data_last;
assign dest_data_strb = mem_data_strb;
assign mem_data_ready = dest_data_ready;
end else begin
@ -71,10 +74,11 @@ end else begin
reg valid = 1'b0;
reg [RATIO-1:0] last = 'h0;
reg [DATA_WIDTH_MEM-1:0] data = 'h0;
reg [DATA_WIDTH_MEM/8-1:0] strb = {DATA_WIDTH_MEM/8{1'b1}};
wire last_beat;
assign last_beat = count == RATIO - 1;
assign last_beat = (count == RATIO - 1) | last[0];
always @(posedge clk) begin
if (reset == 1'b1) begin
@ -90,24 +94,43 @@ end else begin
if (reset == 1'b1) begin
count <= 'h0;
end else if (dest_data_ready == 1'b1 && dest_data_valid == 1'b1) begin
if (last_beat == 1'b1) begin
count <= 'h0;
end else begin
count <= count + 1;
end
end
end
assign mem_data_ready = ~valid | (dest_data_ready & last_beat);
integer i;
always @(posedge clk) begin
if (mem_data_ready == 1'b1) begin
data <= mem_data;
last <= {mem_data_last,{RATIO-1{1'b0}}};
/*
* Skip those words where strb would be completely zero for the output
* word. We assume that strb is thermometer encoded (i.e. a certain number
* of LSBs are 1'b1 followed by all 1'b0 in the MSBs) and by extension
* that if the first strb bit for a word is zero that means that all strb
* bits for a word will be zero.
*/
for (i = 0; i < RATIO-1; i = i + 1) begin
last[i] <= mem_data_last & ~mem_data_strb[(i+1)*(DATA_WIDTH_MEM/8/RATIO)];
end
last[RATIO-1] <= mem_data_last;
strb <= mem_data_strb;
end else if (dest_data_ready == 1'b1) begin
data[DATA_WIDTH_MEM-DATA_WIDTH_DEST-1:0] <= data[DATA_WIDTH_MEM-1:DATA_WIDTH_DEST];
strb[(DATA_WIDTH_MEM-DATA_WIDTH_DEST)/8-1:0] <= strb[DATA_WIDTH_MEM/8-1:DATA_WIDTH_DEST/8];
last[RATIO-2:0] <= last[RATIO-1:1];
end
end
assign dest_data_valid = valid;
assign dest_data = data[DATA_WIDTH_DEST-1:0];
assign dest_data_strb = strb[DATA_WIDTH_DEST/8-1:0];
assign dest_data_last = last[0];
end endgenerate

View File

@ -82,8 +82,8 @@ end else begin
reg valid = 1'b0;
reg last = 1'b0;
reg [DATA_WIDTH_MEM-1:0] data = 'h0;
reg [BYTES_PER_BEAT_WIDTH_SRC-1:0] valid_bytes;
reg partial_burst;
reg [BYTES_PER_BEAT_WIDTH_SRC-1:0] valid_bytes = 'h00;
reg partial_burst = 1'b0;
reg [RATIO_WIDTH-1:0] num_beats = {RATIO_WIDTH{1'b1}};
always @(posedge clk) begin

View File

@ -77,6 +77,7 @@ module dmac_dest_mm_axi #(
input fifo_valid,
output fifo_ready,
input [DMA_DATA_WIDTH-1:0] fifo_data,
input [DMA_DATA_WIDTH/8-1:0] fifo_strb,
input fifo_last,
input [BYTES_PER_BURST_WIDTH-1:0] dest_burst_info_length,
@ -149,8 +150,7 @@ assign m_axi_wvalid = fifo_valid;
assign fifo_ready = m_axi_wready;
assign m_axi_wlast = fifo_last;
assign m_axi_wdata = fifo_data;
assign m_axi_wstrb = {(DMA_DATA_WIDTH/8){1'b1}};
assign m_axi_wstrb = fifo_strb;
dmac_response_handler #(
.ID_WIDTH(ID_WIDTH)

View File

@ -238,10 +238,12 @@ wire [ID_WIDTH-1:0] dest_response_id;
wire dest_valid;
wire dest_ready;
wire [DMA_DATA_WIDTH_DEST-1:0] dest_data;
wire [DMA_DATA_WIDTH_DEST/8-1:0] dest_strb;
wire dest_last;
wire dest_fifo_valid;
wire dest_fifo_ready;
wire [DMA_DATA_WIDTH_DEST-1:0] dest_fifo_data;
wire [DMA_DATA_WIDTH_DEST/8-1:0] dest_fifo_strb;
wire dest_fifo_last;
wire src_req_valid;
@ -249,6 +251,7 @@ wire src_req_ready;
wire [DMA_ADDRESS_WIDTH_DEST-1:0] src_req_dest_address;
wire [DMA_ADDRESS_WIDTH_SRC-1:0] src_req_src_address;
wire [BEATS_PER_BURST_WIDTH_SRC-1:0] src_req_last_burst_length;
wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_req_last_beat_bytes;
wire src_req_sync_transfer_start;
wire src_req_xlast;
@ -269,11 +272,13 @@ wire [ID_WIDTH-1:0] src_response_id;
wire src_valid;
wire [DMA_DATA_WIDTH_SRC-1:0] src_data;
wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_valid_bytes;
wire src_last;
wire src_partial_burst;
wire block_descr_to_dst;
wire src_fifo_valid;
wire [DMA_DATA_WIDTH_SRC-1:0] src_fifo_data;
wire [BYTES_PER_BEAT_WIDTH_SRC-1:0] src_fifo_valid_bytes;
wire src_fifo_last;
wire src_fifo_partial_burst;
@ -388,6 +393,7 @@ dmac_dest_mm_axi #(
.fifo_valid(dest_valid),
.fifo_ready(dest_ready),
.fifo_data(dest_data),
.fifo_strb(dest_strb),
.fifo_last(dest_last),
.dest_burst_info_length(dest_burst_info_length),
@ -631,6 +637,7 @@ dmac_src_mm_axi #(
.req_ready(src_req_ready),
.req_address(src_req_src_address),
.req_last_burst_length(src_req_last_burst_length),
.req_last_beat_bytes(src_req_last_beat_bytes),
.bl_valid(src_bl_valid),
.bl_ready(src_bl_ready),
@ -651,6 +658,7 @@ dmac_src_mm_axi #(
.fifo_valid(src_valid),
.fifo_data(src_data),
.fifo_valid_bytes(src_valid_bytes),
.fifo_last(src_last),
.m_axi_arready(m_axi_arready),
@ -746,6 +754,8 @@ dmac_src_axi_stream #(
.s_axis_xfer_req(s_axis_xfer_req)
);
assign src_valid_bytes = {BYTES_PER_BEAT_WIDTH_SRC{1'b1}};
util_axis_fifo #(
.DATA_WIDTH(ID_WIDTH + 3),
.ADDRESS_WIDTH(0),
@ -836,6 +846,8 @@ dmac_src_fifo_inf #(
.xfer_req(fifo_wr_xfer_req)
);
assign src_valid_bytes = {BYTES_PER_BEAT_WIDTH_SRC{1'b1}};
end else begin
assign fifo_wr_overflow = 1'b0;
@ -919,7 +931,7 @@ sync_bits #(
);
axi_register_slice #(
.DATA_WIDTH(DMA_DATA_WIDTH_SRC + 2),
.DATA_WIDTH(DMA_DATA_WIDTH_SRC + BYTES_PER_BEAT_WIDTH_SRC + 2),
.FORWARD_REGISTERED(AXI_SLICE_SRC),
.BACKWARD_REGISTERED(0)
) i_src_slice (
@ -927,10 +939,10 @@ axi_register_slice #(
.resetn(src_resetn),
.s_axi_valid(src_valid),
.s_axi_ready(),
.s_axi_data({src_data,src_last,src_partial_burst}),
.s_axi_data({src_data,src_valid_bytes,src_last,src_partial_burst}),
.m_axi_valid(src_fifo_valid),
.m_axi_ready(1'b1), /* No backpressure */
.m_axi_data({src_fifo_data,src_fifo_last,src_fifo_partial_burst})
.m_axi_data({src_fifo_data,src_fifo_valid_bytes,src_fifo_last,src_fifo_partial_burst})
);
axi_dmac_burst_memory #(
@ -950,7 +962,7 @@ axi_dmac_burst_memory #(
.src_data_valid(src_fifo_valid),
.src_data(src_fifo_data),
.src_data_last(src_fifo_last),
.src_data_valid_bytes({BYTES_PER_BEAT_WIDTH_SRC{1'b1}}),
.src_data_valid_bytes(src_fifo_valid_bytes),
.src_data_partial_burst(src_fifo_partial_burst),
.src_data_request_id(src_data_request_id),
@ -961,6 +973,7 @@ axi_dmac_burst_memory #(
.dest_data_ready(dest_fifo_ready),
.dest_data(dest_fifo_data),
.dest_data_last(dest_fifo_last),
.dest_data_strb(dest_fifo_strb),
.dest_burst_info_length(dest_burst_info_length),
.dest_burst_info_partial(dest_burst_info_partial),
@ -975,7 +988,7 @@ axi_dmac_burst_memory #(
);
axi_register_slice #(
.DATA_WIDTH(DMA_DATA_WIDTH_DEST + 1),
.DATA_WIDTH(DMA_DATA_WIDTH_DEST + DMA_DATA_WIDTH_DEST / 8 + 1),
.FORWARD_REGISTERED(AXI_SLICE_DEST),
.BACKWARD_REGISTERED(AXI_SLICE_DEST)
) i_dest_slice (
@ -985,12 +998,14 @@ axi_register_slice #(
.s_axi_ready(dest_fifo_ready),
.s_axi_data({
dest_fifo_last,
dest_fifo_strb,
dest_fifo_data
}),
.m_axi_valid(dest_valid),
.m_axi_ready(dest_ready),
.m_axi_data({
dest_last,
dest_strb,
dest_data
})
);
@ -1030,7 +1045,7 @@ util_axis_fifo #(
);
util_axis_fifo #(
.DATA_WIDTH(DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + BEATS_PER_BURST_WIDTH_SRC + 2),
.DATA_WIDTH(DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + BYTES_PER_BURST_WIDTH + 2),
.ADDRESS_WIDTH(0),
.ASYNC_CLK(ASYNC_CLK_REQ_SRC)
) i_src_req_fifo (
@ -1042,7 +1057,7 @@ util_axis_fifo #(
.s_axis_data({
req_dest_address,
req_src_address,
req_length[BYTES_PER_BURST_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC],
req_length[BYTES_PER_BURST_WIDTH-1:0],
req_sync_transfer_start,
req_xlast
}),
@ -1056,6 +1071,7 @@ util_axis_fifo #(
src_req_dest_address,
src_req_src_address,
src_req_last_burst_length,
src_req_last_beat_bytes,
src_req_sync_transfer_start,
src_req_xlast
}),

View File

@ -51,6 +51,7 @@ module dmac_src_mm_axi #(
output req_ready,
input [DMA_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH] req_address,
input [BEATS_PER_BURST_WIDTH-1:0] req_last_burst_length,
input [BYTES_PER_BEAT_WIDTH-1:0] req_last_beat_bytes,
input enable,
output reg enabled = 1'b0,
@ -73,6 +74,7 @@ module dmac_src_mm_axi #(
output fifo_valid,
output [DMA_DATA_WIDTH-1:0] fifo_data,
output [BYTES_PER_BEAT_WIDTH-1:0] fifo_valid_bytes,
output fifo_last,
// Read address
@ -108,6 +110,23 @@ assign response_id = id;
assign measured_last_burst_length = req_last_burst_length;
reg [BYTES_PER_BEAT_WIDTH-1:0] last_beat_bytes;
reg [BYTES_PER_BEAT_WIDTH-1:0] last_beat_bytes_mem[0:2**ID_WIDTH-1];
assign fifo_valid_bytes = last_beat_bytes_mem[data_id];
always @(posedge m_axi_aclk) begin
if (bl_ready_ag == 1'b1 && bl_valid_ag == 1'b1) begin
last_beat_bytes <= req_last_beat_bytes;
end
end
always @(posedge m_axi_aclk) begin
last_beat_bytes_mem[address_id] <= address_eot ? last_beat_bytes :
{BYTES_PER_BEAT_WIDTH{1'b1}};
end
splitter #(
.NUM_M(3)
) i_req_splitter (

View File

@ -45,7 +45,6 @@ module dmac_dma_read_tb;
`include "tb_base.v"
localparam TRANSFER_ADDR = 32'h80000000;
localparam WIDTH_MAX = WIDTH_DEST > WIDTH_SRC ? WIDTH_DEST : WIDTH_SRC;
reg req_valid = 1'b1;
wire req_ready;
@ -109,7 +108,7 @@ module dmac_dma_read_tb;
.DMA_TYPE_DEST(2),
.DMA_DATA_WIDTH_SRC(WIDTH_SRC),
.DMA_DATA_WIDTH_DEST(WIDTH_DEST),
.DMA_LENGTH_ALIGN($clog2(WIDTH_MAX/8)),
.DMA_LENGTH_ALIGN($clog2(WIDTH_DEST/8)),
.FIFO_SIZE(8)
) transfer (
.m_src_axi_aclk(clk),

View File

@ -45,7 +45,6 @@ module dmac_dma_write_tb;
`include "tb_base.v"
localparam TRANSFER_ADDR = 32'h80000000;
localparam WIDTH_MAX = WIDTH_DEST > WIDTH_SRC ? WIDTH_DEST : WIDTH_SRC;
reg req_valid = 1'b1;
wire req_ready;
@ -109,7 +108,7 @@ module dmac_dma_write_tb;
axi_dmac_transfer #(
.DMA_DATA_WIDTH_SRC(WIDTH_SRC),
.DMA_DATA_WIDTH_DEST(WIDTH_DEST),
.DMA_LENGTH_ALIGN($clog2(WIDTH_MAX/8))
.DMA_LENGTH_ALIGN($clog2(WIDTH_SRC/8))
) i_transfer (
.m_dest_axi_aclk (clk),
.m_dest_axi_aresetn(resetn),