pluto_hdl_adi/library/data_offload/data_offload.v

413 lines
16 KiB
Verilog

// ***************************************************************************
// ***************************************************************************
// Copyright 2018 (c) Analog Devices, Inc. All rights reserved.
//
// In this HDL repository, there are many different and unique modules, consisting
// of various HDL (Verilog or VHDL) components. The individual modules are
// developed independently, and may be accompanied by separate and unique license
// terms.
//
// The user should read each of these license terms, and understand the
// freedoms and responsibilities that he or she has by using this source/core.
//
// This core is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
// A PARTICULAR PURPOSE.
//
// Redistribution and use of source or resulting binaries, with or without modification
// of this file, are permitted under one of the following two license terms:
//
// 1. The GNU General Public License version 2 as published by the
// Free Software Foundation, which can be found in the top level directory
// of this repository (LICENSE_GPL2), and also online at:
// <https://www.gnu.org/licenses/old-licenses/gpl-2.0.html>
//
// OR
//
// 2. An ADI specific BSD license, which can be found in the top level directory
// of this repository (LICENSE_ADIBSD), and also on-line at:
// https://github.com/analogdevicesinc/hdl/blob/master/LICENSE_ADIBSD
// This will allow to generate bit files and not release the source code,
// as long as it attaches to an ADI device.
//
// ***************************************************************************
// ***************************************************************************
`timescale 1ns / 1ps
module data_offload #(
parameter ID = 0,
parameter [ 0:0] MEM_TYPE = 1'b0, // 1'b0 -FPGA RAM; 1'b1 - external memory
parameter [33:0] MEM_SIZE = 1023, // memory size in bytes -1 - max 16 GB
parameter MEMC_UIF_DATA_WIDTH = 512,
parameter MEMC_UIF_ADDRESS_WIDTH = 31,
parameter [31:0] MEMC_BADDRESS = 32'h00000000,
parameter TX_OR_RXN_PATH = 0, // if set IP is used in TX path, other wise in RX path
parameter SRC_DATA_WIDTH = 64,
parameter SRC_RAW_DATA_EN = 1'b0,
parameter SRC_ADDR_WIDTH = 8,
parameter DST_ADDR_WIDTH = 7,
parameter DST_DATA_WIDTH = 128,
parameter DST_RAW_DATA_EN = 1'b0, // TBD
parameter DST_CYCLIC_EN = 1'b0, // 1'b1 - CYCLIC mode enabled; 1'b0 - CYCLIC mode disabled
parameter AUTO_BRINGUP = 1) (
// AXI4 Slave for configuration
input s_axi_aclk,
input s_axi_aresetn,
input s_axi_awvalid,
input [15:0] s_axi_awaddr,
input [ 2:0] s_axi_awprot,
output s_axi_awready,
input s_axi_wvalid,
input [31:0] s_axi_wdata,
input [ 3:0] s_axi_wstrb,
output s_axi_wready,
output s_axi_bvalid,
output [ 1:0] s_axi_bresp,
input s_axi_bready,
input s_axi_arvalid,
input [15:0] s_axi_araddr,
input [ 2:0] s_axi_arprot,
output s_axi_arready,
output s_axi_rvalid,
input s_axi_rready,
output [ 1:0] s_axi_rresp,
output [31:0] s_axi_rdata,
// AXI4 stream slave for source stream (TX_DMA or ADC) -- Source interface
input s_axis_aclk,
input s_axis_aresetn,
output s_axis_ready,
input s_axis_valid,
input [SRC_DATA_WIDTH-1:0] s_axis_data,
input s_axis_last,
input [SRC_DATA_WIDTH/8-1:0] s_axis_tkeep,
// AXI4 stream master for destination stream (RX_DMA or DAC) -- Destination
// interface
input m_axis_aclk,
input m_axis_aresetn,
input m_axis_ready,
output m_axis_valid,
output [DST_DATA_WIDTH-1:0] m_axis_data,
output m_axis_last,
output [DST_DATA_WIDTH/8-1:0] m_axis_tkeep,
// initialization request interface
input init_req,
output init_ack,
input sync_ext,
// FIFO interface - Memory UI
output fifo_src_wen,
output fifo_src_resetn,
output [SRC_ADDR_WIDTH-1:0] fifo_src_waddr,
output [SRC_DATA_WIDTH-1:0] fifo_src_wdata,
output fifo_src_wlast,
output fifo_dst_ren,
input fifo_dst_ready,
output fifo_dst_resetn,
output [DST_ADDR_WIDTH-1:0] fifo_dst_raddr,
input [DST_DATA_WIDTH-1:0] fifo_dst_rdata,
// Status and monitor
input ddr_calib_done
);
// local parameters -- to make the code more readable
localparam SRC_ADDR_WIDTH_BYPASS = (SRC_DATA_WIDTH > DST_DATA_WIDTH) ? 3 : 3 + $clog2(SRC_DATA_WIDTH/DST_DATA_WIDTH);
localparam DST_ADDR_WIDTH_BYPASS = (SRC_DATA_WIDTH <= DST_DATA_WIDTH) ? 3 + $clog2(DST_DATA_WIDTH/SRC_DATA_WIDTH) : 3;
localparam SRC_BEAT_BYTE = $clog2(SRC_DATA_WIDTH/8);
// NOTE: Clock domain prefixes
// src_* - AXI4 Stream Slave interface's clock domain
// dst_* - AXI4 Stream Master interface's clock domain
// internal signals
wire up_clk;
wire up_rstn;
wire up_wreq_s;
wire [13:0] up_waddr_s;
wire [31:0] up_wdata_s;
wire up_rreq_s;
wire [13:0] up_raddr_s;
wire up_wack_s;
wire up_rack_s;
wire [31:0] up_rdata_s;
wire src_clk;
wire src_rstn;
wire src_valid_out_s;
wire [SRC_ADDR_WIDTH-1:0] src_wr_addr_s;
wire src_wr_ready_s;
wire src_wr_last_s;
wire [SRC_DATA_WIDTH/8-1:0] src_wr_tkeep_s;
wire dst_clk;
wire dst_rstn;
wire [DST_ADDR_WIDTH-1:0] dst_raddr_s;
wire [DST_DATA_WIDTH-1:0] dst_mem_data_s;
wire src_bypass_s;
wire dst_bypass_s;
wire oneshot_s;
wire [63:0] sample_count_s;
wire [ 1:0] sync_config_s;
wire sync_int_s;
wire valid_bypass_s;
wire [DST_DATA_WIDTH-1:0] data_bypass_s;
wire ready_bypass_s;
wire [ 1:0] src_fsm_status_s;
wire [ 1:0] dst_fsm_status_s;
wire m_axis_valid_s;
wire m_axis_last_s;
wire [DST_DATA_WIDTH-1:0] m_axis_data_s;
wire dst_mem_valid_s;
wire dst_mem_valid_int_s;
wire m_axis_reset_int_s;
wire [31:0] src_transfer_length_s;
wire src_wr_last_int_s;
wire [31:0] src_wr_last_beat_s;
wire int_not_full;
assign src_clk = s_axis_aclk;
assign dst_clk = m_axis_aclk;
// internal registers
reg [31:0] src_data_counter = 0;
reg dst_mem_valid_d = 1'b0;
generate
if (TX_OR_RXN_PATH) begin
assign src_wr_last_s = s_axis_last;
assign src_wr_tkeep_s = s_axis_tkeep;
assign m_axis_reset_int_s = ~dst_rstn;
end else begin
assign src_wr_last_s = src_wr_last_int_s;
assign src_wr_tkeep_s = {(SRC_DATA_WIDTH/8){1'b1}};
assign m_axis_reset_int_s = ~dst_rstn | ~init_req;
end
endgenerate
assign fifo_src_wlast = src_wr_last_s;
// Offload FSM and control
data_offload_fsm #(
.TX_OR_RXN_PATH (TX_OR_RXN_PATH),
.WR_ADDRESS_WIDTH (SRC_ADDR_WIDTH),
.WR_DATA_WIDTH (SRC_DATA_WIDTH),
.RD_ADDRESS_WIDTH (DST_ADDR_WIDTH),
.RD_DATA_WIDTH (DST_DATA_WIDTH))
i_data_offload_fsm (
.wr_clk (src_clk),
.wr_resetn_in (src_rstn),
.wr_resetn_out (fifo_src_resetn),
.wr_valid_in (s_axis_valid),
.wr_valid_out (fifo_src_wen),
.wr_ready (src_wr_ready_s),
.wr_addr (fifo_src_waddr),
.wr_last (src_wr_last_s),
.wr_tkeep (src_wr_tkeep_s),
.rd_clk (dst_clk),
.rd_resetn_in (dst_rstn),
.rd_resetn_out (fifo_dst_resetn),
.rd_ready (fifo_dst_ready_int_s),
.rd_valid (dst_mem_valid_s),
.rd_addr (fifo_dst_raddr),
.rd_last (),
.rd_tkeep (m_axis_tkeep),
.rd_oneshot (oneshot_s),
.init_req (init_req),
.init_ack (init_ack),
.sync_config (sync_config_s),
.sync_external (sync_ext),
.sync_internal (sync_int_s),
.wr_fsm_state (src_fsm_status_s),
.rd_fsm_state (dst_fsm_status_s),
.sample_count (sample_count_s)
);
// In case of external memory, read back can not start right after the write
// was finished (because of the CDC FIFOs and the latency of the EMIF
// interface)
generate
if (MEM_TYPE == 1'b1) begin
assign dst_mem_valid_int_s = dst_mem_valid_s;
end else begin
// Compensate the 1 cycle READ latency of the BRAM
always @(posedge m_axis_aclk) begin
dst_mem_valid_d <= dst_mem_valid_s;
end
assign dst_mem_valid_int_s = dst_mem_valid_d;
end
endgenerate
assign fifo_dst_ready_int_s = fifo_dst_ready & int_not_full;
assign fifo_src_wdata = s_axis_data;
assign fifo_dst_ren = dst_mem_valid_s;
ad_axis_inf_rx #(
.DATA_WIDTH (DST_DATA_WIDTH))
i_rx_axis_inf (
.clk (m_axis_aclk),
.rst (m_axis_reset_int_s),
.valid (dst_mem_valid_int_s),
.data (fifo_dst_rdata),
.last (1'b0),
.inf_valid (m_axis_valid_s),
.inf_last (m_axis_last_s),
.inf_data (m_axis_data_s),
.inf_ready (m_axis_ready),
.int_not_full(int_not_full));
assign m_axis_valid = (dst_bypass_s) ? valid_bypass_s : m_axis_valid_s;
assign m_axis_data = (dst_bypass_s) ? data_bypass_s : m_axis_data_s;
assign m_axis_last = (dst_bypass_s) ? 1'b0 : m_axis_last_s;
assign s_axis_ready = (src_bypass_s) ? ready_bypass_s : src_wr_ready_s;
// Bypass module instance -- the same FIFO, just a smaller depth
// NOTE: Generating an overflow is making sense just in BYPASS mode, and
// it's supported just with the FIFO interface
util_axis_fifo_asym #(
.S_DATA_WIDTH (SRC_DATA_WIDTH),
.S_ADDRESS_WIDTH (SRC_ADDR_WIDTH_BYPASS),
.M_DATA_WIDTH (DST_DATA_WIDTH),
.ASYNC_CLK (1))
i_bypass_fifo (
.m_axis_aclk (m_axis_aclk),
.m_axis_aresetn (dst_rstn),
.m_axis_ready (m_axis_ready),
.m_axis_valid (valid_bypass_s),
.m_axis_data (data_bypass_s),
.m_axis_tlast (),
.m_axis_empty (),
.m_axis_almost_empty (),
.s_axis_aclk (s_axis_aclk),
.s_axis_aresetn (src_rstn),
.s_axis_ready (ready_bypass_s),
.s_axis_valid (s_axis_valid),
.s_axis_data (s_axis_data),
.s_axis_tlast (),
.s_axis_full (),
.s_axis_almost_full ()
);
// register map
data_offload_regmap #(
.ID (ID),
.MEM_TYPE (MEM_TYPE),
.MEM_SIZE (MEM_SIZE),
.TX_OR_RXN_PATH (TX_OR_RXN_PATH),
.AUTO_BRINGUP (AUTO_BRINGUP))
i_regmap (
.up_clk (up_clk),
.up_rstn (up_rstn),
.up_rreq (up_rreq_s),
.up_rack (up_rack_s),
.up_raddr (up_raddr_s),
.up_rdata (up_rdata_s),
.up_wreq (up_wreq_s),
.up_wack (up_wack_s),
.up_waddr (up_waddr_s),
.up_wdata (up_wdata_s),
.src_clk (s_axis_aclk),
.dst_clk (m_axis_aclk),
.src_sw_resetn (src_rstn),
.dst_sw_resetn (dst_rstn),
.ddr_calib_done (ddr_calib_done),
.src_bypass (src_bypass_s),
.dst_bypass (dst_bypass_s),
.oneshot (oneshot_s),
.sync (sync_int_s),
.sync_config (sync_config_s),
.src_transfer_length (src_transfer_length_s),
.src_fsm_status (src_fsm_status_s),
.dst_fsm_status (dst_fsm_status_s),
.sample_count_msb (sample_count_s[63:32]),
.sample_count_lsb (sample_count_s[31: 0])
);
// axi interface wrapper
assign up_clk = s_axi_aclk;
assign up_rstn = s_axi_aresetn;
up_axi #(
.AXI_ADDRESS_WIDTH (16))
i_up_axi (
.up_rstn (up_rstn),
.up_clk (up_clk),
.up_axi_awvalid (s_axi_awvalid),
.up_axi_awaddr (s_axi_awaddr),
.up_axi_awready (s_axi_awready),
.up_axi_wvalid (s_axi_wvalid),
.up_axi_wdata (s_axi_wdata),
.up_axi_wstrb (s_axi_wstrb),
.up_axi_wready (s_axi_wready),
.up_axi_bvalid (s_axi_bvalid),
.up_axi_bresp (s_axi_bresp),
.up_axi_bready (s_axi_bready),
.up_axi_arvalid (s_axi_arvalid),
.up_axi_araddr (s_axi_araddr),
.up_axi_arready (s_axi_arready),
.up_axi_rvalid (s_axi_rvalid),
.up_axi_rresp (s_axi_rresp),
.up_axi_rdata (s_axi_rdata),
.up_axi_rready (s_axi_rready),
.up_wreq (up_wreq_s),
.up_waddr (up_waddr_s),
.up_wdata (up_wdata_s),
.up_wack (up_wack_s),
.up_rreq (up_rreq_s),
.up_raddr (up_raddr_s),
.up_rdata (up_rdata_s),
.up_rack (up_rack_s));
/* Beat counter on the source interface
*
* The storage unit can have size of a couple of Gbyte, which in case of an RX
* path would mean to fill up all that memory space before pushing over the
* stream to the RX DMA. (ADC can not generate a tlast) To make things more
* practical, user can set an arbitrary transfer length using the
* transfer_length register, which will be used to generate an internal tlast
* signal for the source FSM. If the register is set to zero, all the memory
* will be filled up, before passing control to the destination FSM.
*
*/
always @(posedge s_axis_aclk) begin
if (fifo_src_resetn == 1'b0) begin // counter should reset when source FMS resets
src_data_counter <= 0;
end else begin
if (fifo_src_wen & src_wr_ready_s) begin
src_data_counter <= src_data_counter + 1'b1;
end
end
end
// transfer length is in bytes, but counter monitors the source data beats
assign src_wr_last_beat_s = (src_transfer_length_s == 32'h0) ? MEM_SIZE[31:SRC_BEAT_BYTE]-1 : src_transfer_length_s[31:SRC_BEAT_BYTE];
assign src_wr_last_int_s = (src_data_counter == src_wr_last_beat_s) ? 1'b1 : 1'b0;
endmodule