美文网首页
全并行流水线移位相加乘法器

全并行流水线移位相加乘法器

作者: 月见樽 | 来源:发表于2017-08-15 10:50 被阅读0次

    基本算法

    与分时复用的移位相加类似,取消分时复用,使用面积换时间,使用流水线设计,流水线填满后可以一个时钟周期计算出一个结果

    • 分别计算乘数的移位结果,并与被乘数对应位相与
    • 使用加法树将结果相加

    RTL代码

    移位部分

    固定移位单元代码如下,当被乘数第n位为1时,输出乘数移位向左移位n位的结果

    module shift_unit #(
        parameter WIDTH = 4,
        parameter SHIFT_NUM = 0
    )(
        input clk,    // Clock
        input rst_n,  // Asynchronous reset active low
        input shift_valid,
        input shift_mask,
        input [WIDTH - 1:0]shift_din,
    
        output reg [2 * WIDTH - 1:0]shift_dout
    );
    
    wire [2 * WIDTH - 1:0]shift_din_ext;
    assign shift_din_ext = {(WIDTH)'(0),shift_din};
    
    always @ (posedge clk or negedge rst_n) begin
        if(~rst_n) begin
            shift_dout <= 'b0;
        end else if((shift_valid == 1'b1) && (shift_mask == 1'b1)) begin
            shift_dout <= shift_din_ext << SHIFT_NUM;
        end else begin
            shift_dout <= 'b0;
        end
    end
    
    endmodule
    

    移位器代码如下,使用生成语句生成位宽个移位器

    module parallel_shifter #(
        parameter WIDTH = 4
    )(
        input clk,    // Clock
        input rst_n,  // Asynchronous reset active low
    
        input mult_valid,
        input [WIDTH - 1:0]mult1,mult2,
    
        output [(WIDTH ** 2) * 2 - 1:0]shift_dout
    );
    
    genvar a;
    generate
        for (a = 0; a < WIDTH; a = a + 1) begin:shifter_layer
            shift_unit #(
                .WIDTH(WIDTH),
                .SHIFT_NUM(a)
            ) u_shift_unit (
                .clk(clk),    // Clock
                .rst_n(rst_n),  // Asynchronous reset active low
                .shift_valid(mult_valid),
                .shift_mask(mult2[a]),
                .shift_din(mult1),
    
                .shift_dout(shift_dout[a * 2 * WIDTH +: 2 * WIDTH])
            );
        end
    endgenerate
    
    endmodule
    

    加法部分

    加法部分使用加法树,可以实现流水线操作,以下为加法数单层代码

    module adder_layer #(
        parameter ADDER_NUM = 4,
        parameter ADDER_WIDTH = 8
    )(
        input clk,    // Clock
        input rst_n,  // Asynchronous reset active low
        input [ADDER_NUM * ADDER_WIDTH * 2 - 1:0]adder_din,
    
        output [ADDER_NUM * (ADDER_WIDTH + 1) - 1:0]adder_dout
    );
    
    genvar i;
    generate
        for(i = 0;i < ADDER_NUM;i = i + 1) begin:adder_layer_gen
            wire [ADDER_WIDTH - 1:0]add1 = adder_din[2 * i * ADDER_WIDTH +: ADDER_WIDTH];
            wire [ADDER_WIDTH - 1:0]add2 = adder_din[(2 * i + 1) * ADDER_WIDTH +: ADDER_WIDTH];
            wire [ADDER_WIDTH:0]sum = add1 + add2;
            reg [ADDER_WIDTH:0]sum_reg;
            always @ (posedge clk or negedge rst_n) begin
                if(~rst_n) begin
                    sum_reg <= 'b0;
                end else begin
                    sum_reg <= sum;
                end
            end
            assign adder_dout[i * (ADDER_WIDTH + 1) +: ADDER_WIDTH + 1] = sum_reg;
        end
    endgenerate
    
    endmodule
    

    以下为加法树代码

    module adder_tree #(
        parameter LAYER_NUM = 4,
        parameter MIN_ADDER_WIDTH = 8
    )(
        input clk,    // Clock
        input rst_n,  // Asynchronous reset active low
    
        input [(2 ** LAYER_NUM) * MIN_ADDER_WIDTH - 1:0]adder_din,
        output [LAYER_NUM + MIN_ADDER_WIDTH - 1:0]adder_dout
    );
    
    genvar i;
    generate
        for(i = LAYER_NUM;i > 0;i = i - 1)begin:adder_layer_def
            wire [(2 ** i) * (MIN_ADDER_WIDTH + LAYER_NUM - i) - 1:0]layer_din;
            wire [2 ** (i - 1) * (MIN_ADDER_WIDTH + LAYER_NUM - i + 1) - 1:0]layer_dout;
            if(i == LAYER_NUM) begin
                assign layer_din = adder_din;
            end else begin
                assign layer_din = adder_layer_def[i + 1].layer_dout;
            end
            adder_layer # (
                .ADDER_NUM(2 ** (i - 1)),
                .ADDER_WIDTH(MIN_ADDER_WIDTH + LAYER_NUM - i)
            ) u_adder_layer (
                .clk(clk),    // Clock
                .rst_n(rst_n),  // Asynchronous reset active low
                .adder_din(layer_din),
                .adder_dout(layer_dout)
            );
        end
    endgenerate
    
    assign adder_dout = adder_layer_def[1].layer_dout;
    endmodule
    

    顶层

    顶层组合了加法器和移位器,代码如下

    module shift_adder #(
        parameter LOG2_WIDTH = 2
    )(
        input clk,    // Clock
        input rst_n,  // Asynchronous reset active low
    
        input [2 ** LOG2_WIDTH - 1:0]mult1,mult2,
        input din_valid,
    
        output [(2 ** LOG2_WIDTH) * 2 - 1:0]dout
    );
    
    parameter WIDTH = 2 ** LOG2_WIDTH;
    
    wire [(WIDTH ** 2) * 2 - 1:0]shift_dout;
    parallel_shifter #(
        .WIDTH(WIDTH)
    ) u_parallel_shifter (
        .clk(clk),    // Clock
        .rst_n(rst_n),  // Asynchronous reset active low
    
        .mult_valid(din_valid),
        .mult1(mult1),
        .mult2(mult2),
    
        .shift_dout(shift_dout)
    );
    
    wire [LOG2_WIDTH + 2 * WIDTH:0]adder_dout;
    adder_tree #(
        .LAYER_NUM(LOG2_WIDTH),
        .MIN_ADDER_WIDTH(2 * WIDTH)
    ) u_adder_tree (
        .clk(clk),    // Clock
        .rst_n(rst_n),  // Asynchronous reset active low
    
        .adder_din(shift_dout),
        .adder_dout(adder_dout)
    );
    assign dout = adder_dout[WIDTH * 2 - 1:0];
    
    endmodule
    

    测试

    测试平台使用sv语法完成,因该乘法器完成一次运算的时间固定因此无输出有效信号,找到固定延迟后与使用*计算出的结果比较即可

    module mult_tb (
    );
    
    parameter LOG2_WIDTH = 2;
    parameter WIDTH = 2 ** LOG2_WIDTH;
    
    logic clk,rst_n;
    logic multiplier_valid;
    logic [WIDTH - 1:0]multiplier1;
    logic [WIDTH - 1:0]multiplier2;
    
    logic [2 * WIDTH - 1:0]product;
    
    shift_adder #(
        .LOG2_WIDTH(LOG2_WIDTH)
    ) dut (
        .clk(clk),    // Clock
        .rst_n(rst_n),  // Asynchronous reset active low
    
        .mult1(multiplier1),
        .mult2(multiplier2),
        .din_valid(multiplier_valid),
    
        .dout(product)
    );
    
    initial begin
        clk = 1'b0;
        forever begin
            #50 clk = ~clk;
        end
    end
    
    initial begin
        rst_n = 1'b1;
        #5 rst_n = 1'b0;
        #10 rst_n = 1'b1;
    end
    
    initial begin
        {multiplier_valid,multiplier1,multiplier2} = 'b0;
        repeat(100) begin
            @(negedge clk);
            multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
            multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
            multiplier_valid = 1'b1;
        end
        $stop();
    end
    
    reg [WIDTH - 1:0]mult11,mult12,mult13;
    reg [WIDTH - 1:0]mult21,mult22,mult23;
    reg [2 * WIDTH - 1:0]exp;
    
    always @ (posedge clk or negedge rst_n) begin
        if(~rst_n) begin
            {mult11,mult12,mult13,mult21,mult22,mult23} <= 'b0;
        end else begin
            mult13 <= mult12;
            mult12 <= mult11;
            mult11 <= multiplier1;
    
            mult23 <= mult22;
            mult22 <= mult21;
            mult21 <= multiplier2;
        end
    end
    
    initial begin
        exp = 'b0;
        forever begin
            @(negedge clk);
            exp = mult13 * mult23;
            if(exp == product) begin
                $display("successful");
            end else begin
                $display("fail");
            end
        end
    end
    endmodule
    

    相关文章

      网友评论

          本文标题:全并行流水线移位相加乘法器

          本文链接:https://www.haomeiwen.com/subject/vcemrxtx.html