axi_adc_decimate: Re-implemented FIR filter

The minimum decimation rate of the CIC block is five, this means data
arrives at the FIR filter at most every five clock cycles. The decimation
rate of the filter is two so the filter produces an output at most every
ten clock cycles. This allows for ten clock cycles to compute the result.

The current implementation of the filter uses a fully pipelined
architecture with one multiplier for each coefficient. Which then do work
for one clock cycle and sit idle for the next nine clock cycles.

Rework the filter to be sequential reducing the number of required
multipliers to one. In addition exploit the symmetric structure of the
filter to make use of the preadder reducing the required multiply
operations by two.

This significantly reduces the logic utilization of the filter as well as
moderately reduces power consumption.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
main
Lars-Peter Clausen 2017-04-03 18:54:00 +02:00
parent 737418a1b0
commit 3e7325b29a
1 changed files with 255 additions and 305 deletions

View File

@ -1,331 +1,281 @@
// -------------------------------------------------------------
// ***************************************************************************
// ***************************************************************************
// Copyright 2017(c) Analog Devices, Inc.
//
// Module: fir_decim
// Generated by MATLAB(R) 9.0 and the Filter Design HDL Coder 3.0.
// Generated on: 2016-07-05 15:45:22
// -------------------------------------------------------------
// -------------------------------------------------------------
// HDL Code Generation Options:
// All rights reserved.
//
// FIRAdderStyle: tree
// OptimizeForHDL: on
// EDAScriptGeneration: off
// AddPipelineRegisters: on
// Name: fir_decim
// TargetLanguage: Verilog
// TestBenchName: fo_copy_tb
// TestBenchStimulus: step ramp chirp noise
// GenerateHDLTestBench: off
// -------------------------------------------------------------
// HDL Implementation : Fully parallel
// Multipliers : 6
// Folding Factor : 1
// -------------------------------------------------------------
// Filter Settings:
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in
// the documentation and/or other materials provided with the
// distribution.
// - Neither the name of Analog Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
// - The use of this software may or may not infringe the patent rights
// of one or more patent holders. This license does not release you
// from the requirement that you obtain separate licenses from these
// patent holders to use this software.
// - Use of the software either in source or binary form, must be run
// on or directly connected to an Analog Devices Inc. component.
//
// Discrete-Time FIR Multirate Filter (real)
// -----------------------------------------
// Filter Structure : Direct-Form FIR Polyphase Decimator
// Decimation Factor : 2
// Polyphase Length : 3
// Filter Length : 6
// Stable : Yes
// Linear Phase : Yes (Type 2)
// THIS SOFTWARE IS PROVIDED BY ANALOG DEVICES "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
// INCLUDING, BUT NOT LIMITED TO, NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED.
//
// Arithmetic : fixed
// Numerator : s12,11 -> [-1 1)
// -------------------------------------------------------------
// IN NO EVENT SHALL ANALOG DEVICES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, INTELLECTUAL PROPERTY
// RIGHTS, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ***************************************************************************
// ***************************************************************************
`timescale 1 ns / 1 ns
module fir_decim
(
clk,
clk_enable,
reset,
filter_in,
filter_out,
ce_out
);
module fir_decim #(
parameter USE_DSP48E = 1
) (
input clk,
input clk_enable,
input reset,
input signed [11:0] filter_in,
output reg signed [25:0] filter_out,
output reg ce_out
);
input clk;
input clk_enable;
input reset;
input signed [11:0] filter_in; //sfix12_En11
output signed [25:0] filter_out; //sfix26_En22
output ce_out;
localparam signed [11:0] coeffphase1_1 = 12'b000011010101; //sfix12_En11
localparam signed [11:0] coeffphase1_2 = 12'b011011110010; //sfix12_En11
localparam signed [11:0] coeffphase1_3 = 12'b110000111110; //sfix12_En11
////////////////////////////////////////////////////////////////
//Module Architecture: fir_decim
////////////////////////////////////////////////////////////////
// Local Functions
// Type Definitions
// Constants
parameter signed [11:0] coeffphase1_1 = 12'b000011010101; //sfix12_En11
parameter signed [11:0] coeffphase1_2 = 12'b011011110010; //sfix12_En11
parameter signed [11:0] coeffphase1_3 = 12'b110000111110; //sfix12_En11
parameter signed [11:0] coeffphase2_1 = 12'b110000111110; //sfix12_En11
parameter signed [11:0] coeffphase2_2 = 12'b011011110010; //sfix12_En11
parameter signed [11:0] coeffphase2_3 = 12'b000011010101; //sfix12_En11
// We know that clk_enable is asserted at most every 5th clock cycle and the
// output is decimated by two. So we have 10 clock cycles to compute the
// result. That's plenty of time considering that there are only 6
// coefficients.
// Signals
reg [1:0] ring_count; // ufix2
wire phase_0; // boolean
wire phase_1; // boolean
reg ce_out_reg; // boolean
reg signed [11:0] input_register; // sfix12_En11
reg signed [11:0] input_pipeline_phase0 [0:1] ; // sfix12_En11
reg signed [11:0] input_pipeline_phase1 [0:2] ; // sfix12_En11
wire signed [23:0] product_phase0_1; // sfix24_En22
wire signed [23:0] product_phase0_2; // sfix24_En22
wire signed [23:0] product_phase0_3; // sfix24_En22
wire signed [23:0] product_phase1_1; // sfix24_En22
wire signed [23:0] product_phase1_2; // sfix24_En22
wire signed [23:0] product_phase1_3; // sfix24_En22
reg signed [23:0] product_pipeline_phase0_1; // sfix24_En22
reg signed [23:0] product_pipeline_phase0_2; // sfix24_En22
reg signed [23:0] product_pipeline_phase0_3; // sfix24_En22
reg signed [23:0] product_pipeline_phase1_1; // sfix24_En22
reg signed [23:0] product_pipeline_phase1_2; // sfix24_En22
reg signed [23:0] product_pipeline_phase1_3; // sfix24_En22
wire signed [25:0] sumvector1 [0:2] ; // sfix26_En22
wire signed [23:0] add_signext; // sfix24_En22
wire signed [23:0] add_signext_1; // sfix24_En22
wire signed [24:0] add_temp; // sfix25_En22
wire signed [23:0] add_signext_2; // sfix24_En22
wire signed [23:0] add_signext_3; // sfix24_En22
wire signed [24:0] add_temp_1; // sfix25_En22
wire signed [23:0] add_signext_4; // sfix24_En22
wire signed [23:0] add_signext_5; // sfix24_En22
wire signed [24:0] add_temp_2; // sfix25_En22
reg signed [25:0] sumdelay_pipeline1 [0:2] ; // sfix26_En22
wire signed [25:0] sumvector2 [0:1] ; // sfix26_En22
wire signed [25:0] add_signext_6; // sfix26_En22
wire signed [25:0] add_signext_7; // sfix26_En22
wire signed [26:0] add_temp_3; // sfix27_En22
reg signed [25:0] sumdelay_pipeline2 [0:1] ; // sfix26_En22
wire signed [25:0] sum3; // sfix26_En22
wire signed [25:0] add_signext_8; // sfix26_En22
wire signed [25:0] add_signext_9; // sfix26_En22
wire signed [26:0] add_temp_4; // sfix27_En22
reg ce_delayline1; // boolean
reg ce_delayline2; // boolean
reg ce_delayline3; // boolean
reg ce_delayline4; // boolean
reg ce_delayline5; // boolean
reg ce_delayline6; // boolean
reg ce_delayline7; // boolean
reg ce_delayline8; // boolean
wire ce_gated; // boolean
reg signed [25:0] output_register; // sfix26_En22
reg active = 1'b0;
reg active_d1 = 1'b0;
reg active_d2 = 1'b0;
// Block Statements
always @ (posedge clk or posedge reset)
begin: ce_output
if (reset == 1'b1) begin
ring_count <= 1;
reg [1:0] count = 2'b00;
reg phase = 1'b1;
reg ready = 1'b0;
reg [3:0] storage0[0:11];
reg [3:0] storage1[0:11];
reg signed [11:0] data0;
reg signed [11:0] data1;
reg signed [11:0] coeff;
wire signed [25:0] sum;
integer j;
initial begin
for (j = 0; j < 12; j = j + 1) begin
storage0[j] <= 'h00;
storage1[j] <= 'h00;
end
end
always @(posedge clk) begin
if (reset == 1'b1) begin
phase <= 1'b1;
end else begin
if (clk_enable == 1'b1) begin
phase <= phase + 1'b1;
end
else begin
if (clk_enable == 1'b1) begin
ring_count <= {ring_count[0], ring_count[1]};
end
end
end // ce_output
end
end
assign phase_0 = ring_count[0] && clk_enable;
always @(posedge clk) begin
if (clk_enable == 1'b1 && phase == 1'b1) begin
active <= 1'b1;
end else if (count == 'h2) begin
active <= 1'b0;
end
active_d1 <= active;
active_d2 <= active_d1;
end
assign phase_1 = ring_count[1] && clk_enable;
always @(posedge clk) begin
if (active == 1'b1) begin
case (count)
'h2: count <= 'h0;
default: count <= count + 1'b1;
endcase
end
end
// ------------------ CE Output Register ------------------
always @(posedge clk) begin
if (active_d1 == 1'b0 && active_d2 == 1'b1) begin
ready <= 1'b1;
end else begin
ready <= 1'b0;
end
end
always @ (posedge clk or posedge reset)
begin: ce_output_register
if (reset == 1'b1) begin
ce_out_reg <= 1'b0;
end
else begin
ce_out_reg <= phase_1;
end
end // ce_output_register
always @ (posedge clk or posedge reset)
begin: input_reg_process
if (reset == 1'b1) begin
input_register <= 0;
end
else begin
if (clk_enable == 1'b1) begin
input_register <= filter_in;
generate
genvar i;
for (i = 0; i < 12; i = i + 1) begin
always @(posedge clk) begin
if (clk_enable == 1'b1) begin
if (phase == 1'b0) begin
storage0[i] <= {storage0[i][2:0],filter_in[i]};
end
if (phase == 1'b1) begin
storage1[i] <= {storage1[i][2:0],filter_in[i]};
end
end
end // input_reg_process
end
always @( posedge clk or posedge reset)
begin: Delay_Pipeline_Phase0_process
if (reset == 1'b1) begin
input_pipeline_phase0[0] <= 0;
input_pipeline_phase0[1] <= 0;
always @(*) begin
data0[i] <= storage0[i][2-count];
data1[i] <= storage1[i][count];
end
end
endgenerate
always @(*) begin
case (count)
'h0: coeff <= coeffphase1_1;
'h1: coeff <= coeffphase1_2;
'h2: coeff <= coeffphase1_3;
default: coeff <= 'h00;
endcase
end
generate if (USE_DSP48E) begin
wire [47:0] _sum;
wire [6:0] opmode = {1'b0,active_d2,5'b00101};
// Can't exceed 26 bit.
assign sum = _sum[43:18];
// MAC with pre-adder
DSP48E1 #(
.ACASCREG (0),
.ADREG (1),
.ALUMODEREG (0),
.AREG (0),
.AUTORESET_PATDET ("NO_RESET"),
.A_INPUT ("DIRECT"),
.BCASCREG (1),
.BREG (1),
.B_INPUT ("DIRECT"),
.CARRYINREG (0),
.CARRYINSELREG (0),
.CREG (0),
.DREG (0),
.INMODEREG (0),
.MASK (48'h3fffffffffff),
.MREG (1),
.OPMODEREG (1),
.PATTERN (48'h000000000000),
.PREG (1),
.SEL_MASK ("MASK"),
.SEL_PATTERN ("PATTERN"),
.USE_DPORT ("TRUE"),
.USE_MULT ("MULTIPLY"),
.USE_PATTERN_DETECT ("NO_PATDET"),
.USE_SIMD ("ONE48"))
i_dsp_mac (
.CLK (clk),
.A ({5'h0,data0[11],data0,12'h0}), // MSB aligned to 24-bit, 25th bit signed extended
.B ({coeff,6'b0}),
.C (48'h00),
.D ({data1[11],data1,12'h0}),
.MULTSIGNIN (1'b0),
.CARRYIN (1'b0),
.CARRYCASCIN (1'b0),
.ACIN (30'h0),
.BCIN (18'h0),
.PCIN (48'h0),
.P (_sum),
.MULTSIGNOUT (),
.CARRYOUT (),
.CARRYCASCOUT (),
.ACOUT (),
.BCOUT (),
.PCOUT (),
.ALUMODE (4'b0000),
.CARRYINSEL (3'h0),
.INMODE (5'b00100),
.OPMODE (opmode),
.PATTERNBDETECT (),
.PATTERNDETECT (),
.OVERFLOW (),
.UNDERFLOW (),
.CEA1 (1'b0),
.CEA2 (1'b0),
.CEAD (active),
.CEALUMODE (1'b0),
.CEB1 (1'b0),
.CEB2 (active),
.CEC (1'b0),
.CECARRYIN (1'b0),
.CECTRL (active),
.CED (1'b0),
.CEINMODE (1'b0),
.CEM (active_d1),
.CEP (active_d2),
.RSTA (1'b0),
.RSTALLCARRYIN (1'b0),
.RSTALUMODE (1'b0),
.RSTB (1'b0),
.RSTC (1'b0),
.RSTCTRL (1'b0),
.RSTD (1'b0),
.RSTINMODE (1'b0),
.RSTM (1'b0),
.RSTP (1'b0)
);
end else begin
reg signed [25:0] _sum = 'h00;
reg signed [12:0] pre_adder;
reg signed [11:0] coeff_d1;
reg signed [23:0] product = 'h00;
assign sum = _sum;
always @(posedge clk) begin
if (active == 1'b1) begin
pre_adder <= data0 + data1;
coeff_d1 <= coeff;
end
else begin
if (phase_1 == 1'b1) begin
input_pipeline_phase0[0] <= input_register;
input_pipeline_phase0[1] <= input_pipeline_phase0[0];
end
if (active_d1 == 1'b1) begin
product <= coeff_d1 * pre_adder;
end
end // Delay_Pipeline_Phase0_process
always @( posedge clk or posedge reset)
begin: Delay_Pipeline_Phase1_process
if (reset == 1'b1) begin
input_pipeline_phase1[0] <= 0;
input_pipeline_phase1[1] <= 0;
input_pipeline_phase1[2] <= 0;
if (reset == 1'b1 || ready == 1'b1) begin
_sum <= 'h00;
end else if (active_d2 == 1'b1) begin
_sum <= _sum + product;
end
else begin
if (phase_0 == 1'b1) begin
input_pipeline_phase1[0] <= input_register;
input_pipeline_phase1[1] <= input_pipeline_phase1[0];
input_pipeline_phase1[2] <= input_pipeline_phase1[1];
end
end
end // Delay_Pipeline_Phase1_process
end
end
endgenerate
always @(posedge clk) begin
if (reset == 1'b1) begin
ce_out <= 1'b0;
end else begin
ce_out <= ready;
end
end
assign product_phase0_1 = input_register * coeffphase1_1;
always @(posedge clk) begin
if (ready == 1'b1) begin
filter_out <= sum;
end
end
assign product_phase0_2 = input_pipeline_phase0[0] * coeffphase1_2;
assign product_phase0_3 = input_pipeline_phase0[1] * coeffphase1_3;
assign product_phase1_1 = input_pipeline_phase1[0] * coeffphase2_1;
assign product_phase1_2 = input_pipeline_phase1[1] * coeffphase2_2;
assign product_phase1_3 = input_pipeline_phase1[2] * coeffphase2_3;
always @ (posedge clk or posedge reset)
begin: product_pipeline_process1
if (reset == 1'b1) begin
product_pipeline_phase0_1 <= 0;
product_pipeline_phase1_1 <= 0;
product_pipeline_phase0_2 <= 0;
product_pipeline_phase1_2 <= 0;
product_pipeline_phase0_3 <= 0;
product_pipeline_phase1_3 <= 0;
end
else begin
if (phase_1 == 1'b1) begin
product_pipeline_phase0_1 <= product_phase0_1;
product_pipeline_phase1_1 <= product_phase1_1;
product_pipeline_phase0_2 <= product_phase0_2;
product_pipeline_phase1_2 <= product_phase1_2;
product_pipeline_phase0_3 <= product_phase0_3;
product_pipeline_phase1_3 <= product_phase1_3;
end
end
end // product_pipeline_process1
assign add_signext = product_pipeline_phase1_1;
assign add_signext_1 = product_pipeline_phase1_2;
assign add_temp = add_signext + add_signext_1;
assign sumvector1[0] = $signed({{1{add_temp[24]}}, add_temp});
assign add_signext_2 = product_pipeline_phase1_3;
assign add_signext_3 = product_pipeline_phase0_1;
assign add_temp_1 = add_signext_2 + add_signext_3;
assign sumvector1[1] = $signed({{1{add_temp_1[24]}}, add_temp_1});
assign add_signext_4 = product_pipeline_phase0_2;
assign add_signext_5 = product_pipeline_phase0_3;
assign add_temp_2 = add_signext_4 + add_signext_5;
assign sumvector1[2] = $signed({{1{add_temp_2[24]}}, add_temp_2});
always @ (posedge clk or posedge reset)
begin: sumdelay_pipeline_process1
if (reset == 1'b1) begin
sumdelay_pipeline1[0] <= 0;
sumdelay_pipeline1[1] <= 0;
sumdelay_pipeline1[2] <= 0;
end
else begin
if (phase_1 == 1'b1) begin
sumdelay_pipeline1[0] <= sumvector1[0];
sumdelay_pipeline1[1] <= sumvector1[1];
sumdelay_pipeline1[2] <= sumvector1[2];
end
end
end // sumdelay_pipeline_process1
assign add_signext_6 = sumdelay_pipeline1[0];
assign add_signext_7 = sumdelay_pipeline1[1];
assign add_temp_3 = add_signext_6 + add_signext_7;
assign sumvector2[0] = add_temp_3[25:0];
assign sumvector2[1] = sumdelay_pipeline1[2];
always @ (posedge clk or posedge reset)
begin: sumdelay_pipeline_process2
if (reset == 1'b1) begin
sumdelay_pipeline2[0] <= 0;
sumdelay_pipeline2[1] <= 0;
end
else begin
if (phase_1 == 1'b1) begin
sumdelay_pipeline2[0] <= sumvector2[0];
sumdelay_pipeline2[1] <= sumvector2[1];
end
end
end // sumdelay_pipeline_process2
assign add_signext_8 = sumdelay_pipeline2[0];
assign add_signext_9 = sumdelay_pipeline2[1];
assign add_temp_4 = add_signext_8 + add_signext_9;
assign sum3 = add_temp_4[25:0];
always @ (posedge clk or posedge reset)
begin: ce_delay
if (reset == 1'b1) begin
ce_delayline1 <= 1'b0;
ce_delayline2 <= 1'b0;
ce_delayline3 <= 1'b0;
ce_delayline4 <= 1'b0;
ce_delayline5 <= 1'b0;
ce_delayline6 <= 1'b0;
ce_delayline7 <= 1'b0;
ce_delayline8 <= 1'b0;
end
else begin
if (clk_enable == 1'b1) begin
ce_delayline1 <= clk_enable;
ce_delayline2 <= ce_delayline1;
ce_delayline3 <= ce_delayline2;
ce_delayline4 <= ce_delayline3;
ce_delayline5 <= ce_delayline4;
ce_delayline6 <= ce_delayline5;
ce_delayline7 <= ce_delayline6;
ce_delayline8 <= ce_delayline7;
end
end
end // ce_delay
assign ce_gated = ce_delayline8 & ce_out_reg;
always @ (posedge clk or posedge reset)
begin: output_register_process
if (reset == 1'b1) begin
output_register <= 0;
end
else begin
if (phase_1 == 1'b1) begin
output_register <= sum3;
end
end
end // output_register_process
// Assignment Statements
assign ce_out = ce_gated;
assign filter_out = output_register;
endmodule // fir_decim
endmodule