Basic algorithm of ROM multiplier Single ROM multiplier Time division multiplexing ROM multiplier

Basic algorithm of ROM multiplier Single ROM multiplier Time division multiplexing ROM multiplier

Basic algorithm

The algorithm of the ROM multiplier is relatively simple, that is, use a ROM to save the result of the multiplication, and directly go to the corresponding address to look up the table when the operation is needed. For example, to calculate the multiplication of two 4-bit binary numbers a*b, you need an 8-bit input and 8-bit output ROM to store the calculation results. The relationship between the address and the stored data is: the address {a,b}(bit splicing) stores a*b (for example, the address is 8'b00010010The result of storage is 0001*0001=8'b00000010) The ROM used in this case is relatively large, so when the timing requirements are not strict, the clock can be used to change the area. For example, for an 8-bit * 8-bit ROM multiplier, we split it into a multiplier 1 high 4 digits, low 4 digits and multiplier 2 high 4 digits low 4 digits are multiplied in pairs. After the high four bits and the high four bits are multiplied, the result is shifted to the left by 4 bits, and the result of the multiplication of the high four bits and the low four bits is shifted to the left by 2 bits. Multiplication commonly used routines) can be obtained after four (minimum) clock cycles, the ROM used can be reduced from 16*16 to 4*4

Single ROM multiplier

Python generator

A single ROM can be simulated with case statements in Verilog. Handwriting such highly repetitive case statements is undoubtedly a very inefficient method. This time, Python statements are used to generate

class ROMGenerator(object):
    """docstring for ROMGenerator"""

    def __init__(self, Width):
        super(ROMGenerator, self).__init__()
        self.Width = Width

    def GeneratorROM(self, FileName):
        RomContent = ["""
module ROM_%s (
    input [%s:0]addr,
    output reg [%s:0]dout
);

always @(*) begin
    case(addr)\
"""% (self.Width, self.Width * 2-1, self.Width * 2-1)]
        for i in range(2 ** self.Width):
            for j in range(2 ** self.Width):
                RomContent.append(
                    "\t\t%s\'d%s:dout = %s\'d%s;"%
                    (2 * self.Width, i * (2 ** self.Width) + j,
                        2 * self.Width, i * j))
        RomContent.append("""\t\tdefault:dout =/'b0;
    endcase
end
endmodule
""")
        with open("./%sv"% FileName, "w") as filepoint:
            filepoint.write("\n".join(RomContent))
        return "\n".join(RomContent)

if __name__ =='__main__':
    test = ROMGenerator(4)
    print(test.GeneratorROM("ROM_4"))

The code is very simple, except for the beginning and the end, it is needed for batch generation\t\t%s\'d%s:dout = %s\'d%s;

testing platform

When testing, use the test platform written by SystemVerilog, *and compare the output of its own module with operators

module mult_tb (
);

parameter WIDTH = 4;

logic clk,rst_n;
logic [WIDTH-1:0]multiplier1;
logic [WIDTH-1:0]multiplier2;

logic [2 * WIDTH-1:0]product;

ROM_4 dut(
    .addr({multiplier1,multiplier2}),
    .dout(product)
);

initial begin
    clk = 1'b0;
    forever begin
        #50 clk = ~clk;
    end
end

initial begin
    rst_n = 1'b1;
    #5 rst_n = 1'b0;
    #10 rst_n = 1'b1;
end

initial begin
    {multiplier1,multiplier2} ='b0;
    repeat(100) begin
        @(negedge clk);
        multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
        multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
    end
    $stop();
end

logic [2 * WIDTH-1:0]exp;
initial begin
    exp ='b0;
    forever begin
        @(posedge clk);
        exp = multiplier1 * multiplier2;
        if(exp == product) begin
            $display("successful");
        end else begin
            $display("fail");
        end
    end
end
endmodule

Time division multiplexed ROM multiplier

RTL code

core part

module serial_multrom_mult_core #(
    parameter HALF_WIDTH = 4
)(
    input clk,//Clock
    input rst_n,//Asynchronous reset active low

    input [2 * HALF_WIDTH-1:0]mult1,mult2,

    input start,
    input [2 * HALF_WIDTH-1:0]rom_dout,
    output reg [2 * HALF_WIDTH-1:0]rom_address,
    output reg [4 * HALF_WIDTH-1:0]dout
);

parameter INIT = 1'b0,
          WORK = 1'b1;
reg mode;
reg [1:0]counte_4_decay2;
always @ (posedge clk or negedge rst_n) begin
    if(~rst_n) begin
        mode <= 1'b0;
    end else begin
        case (mode)
            INIT:begin
                if(start == 1'b1) begin
                    mode <= WORK;
                end else begin
                    mode <= INIT;
                end
            end
            WORK:begin
                if(counte_4_decay2 == 2'd3) begin
                    mode <= INIT;
                end else begin
                    mode <= WORK;
                end
            end
            default:mode <= INIT;
        endcase
    end
end

So far is the state part of a state machine. When the start signal is valid, the state becomes WORKand the operation ends INIT.

reg [1:0]counte_4;
always @(posedge clk or negedge rst_n) begin: proc_counte_4
    if(~rst_n) begin
        counte_4 <='b0;
    end else if(mode == WORK)begin
        counte_4 <= counte_4 + 1'b1;
    end else begin
        counte_4 <='b0;
    end
end

reg [2 * HALF_WIDTH-1:0]mult1_lock,mult2_lock;
always @(posedge clk or negedge rst_n) begin
    if(~rst_n) begin
        {mult1_lock,mult2_lock} <='b0;
    end else if(start == 1'b1)begin
        {mult1_lock,mult2_lock} <= {mult1,mult2};
    end else begin
        {mult1_lock,mult2_lock} <= {mult1_lock,mult2_lock};
    end
end

reg [1:0]counte_4_decay;
always @ (posedge clk or negedge rst_n) begin
    if(~rst_n) begin
        {rom_address,counte_4_decay} <='b0;
    end else if(start == 1'b1) begin
        {rom_address,counte_4_decay} <='b0;
    end else begin
        case (counte_4)
            2'd0:rom_address <= {mult1_lock[HALF_WIDTH-1:0],mult2_lock[HALF_WIDTH-1:0]};
            2'd1:rom_address <= {mult1_lock[2 * HALF_WIDTH-1:HALF_WIDTH],mult2_lock[HALF_WIDTH-1:0]};
            2'd2:rom_address <= {mult1_lock[HALF_WIDTH-1:0],mult2_lock[2 * HALF_WIDTH-1:HALF_WIDTH]};
            2'd3:rom_address <= {mult1_lock[2 * HALF_WIDTH-1:HALF_WIDTH],mult2_lock[2 * HALF_WIDTH-1:HALF_WIDTH]};
            default:rom_address <='b0;
        endcase
        counte_4_decay <= counte_4;
    end
end

The above is the input control part. The high four digits of the multiplier 1 and the low four digits of the multiplier 2 are spliced ‚Äč‚Äčinto the ROM to obtain the product.

wire [4 * HALF_WIDTH-1:0]rom_dout_ex ='{rom_dout};
reg [4 * HALF_WIDTH-1:0]rom_dout_lock;

always @ (posedge clk or negedge rst_n) begin
    if(~rst_n) begin
        {rom_dout_lock,counte_4_decay2} <='b0;
    end else if(start == 1'b1) begin
        {rom_dout_lock,counte_4_decay2} <='b0;
    end else begin
        case (counte_4_decay)
            2'd0:rom_dout_lock <= rom_dout_ex;
            2'd1:rom_dout_lock <= rom_dout_ex << HALF_WIDTH;
            2'd2:rom_dout_lock <= rom_dout_ex << HALF_WIDTH;
            2'd3:rom_dout_lock <= rom_dout_ex << (2 * HALF_WIDTH);
            default:rom_dout_lock <='b0;
        endcase
        counte_4_decay2 <= counte_4_decay;
    end
end

always @ (posedge clk or negedge rst_n) begin
    if(~rst_n) begin
        dout <='b0;
    end else if(mode == WORK) begin
        dout <= dout + rom_dout_lock;
    end else if(start == 1'b1) begin
        dout <='b0;
    end else begin
        dout <= dout;
    end
end

endmodule

After getting the data from ROM, it will be accumulated after shifting the corresponding digits according to the multiplier.

Top part

module serial_multrom_mult_top #(
    parameter HALF_WIDTH = 2
)(
    input clk,//Clock
    input rst_n,//Asynchronous reset active low

    input start,
    input [2 * HALF_WIDTH-1:0]mult1,mult2,
    output [4 * HALF_WIDTH-1:0]dout
);

wire [2 * HALF_WIDTH-1:0]rom_dout;
wire [2 * HALF_WIDTH-1:0]rom_address;
serial_multrom_mult_core #(
    .HALF_WIDTH(HALF_WIDTH)
) u_serial_multrom_mult_core (
    .clk(clk),//Clock
    .rst_n(rst_n),//Asynchronous reset active low

    .mult1(mult1),
    .mult2(mult2),

    .start(start),
    .rom_dout(rom_dout),
    .rom_address(rom_address),
    .dout(dout)
);

ROM_4 u_ROM_4(
    .addr(rom_address),
    .dout(rom_dout)
);
endmodule

Testbench

The testbench is improved by adding the clock and start signal to the testbench of a single ROM

`timescale 1ns/1ps
module mult_tb (
);

parameter HALF_WIDTH = 4;
parameter WIDTH = HALF_WIDTH * 2;

logic clk,rst_n;
logic start;
logic [WIDTH-1:0]multiplier1;
logic [WIDTH-1:0]multiplier2;

logic [2 * WIDTH-1:0]product;

serial_multrom_mult_top #(
    .HALF_WIDTH(HALF_WIDTH)
) dut (
    .clk(clk),//Clock
    .rst_n(rst_n),//Asynchronous reset active low

    .start(start),
    .mult1(multiplier1),
    .mult2(multiplier2),
    .dout(product)
);

initial begin
    clk = 1'b0;
    forever begin
        #50 clk = ~clk;
    end
end

initial begin
    rst_n = 1'b1;
    #5 rst_n = 1'b0;
    #10 rst_n = 1'b1;
end

logic [2 * WIDTH-1:0]exp;
initial begin
    {multiplier1,multiplier2} ='b0;
    repeat(100) begin
        @(negedge clk);
        start = 1'b1;
        multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
        multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
        exp = multiplier1 * multiplier2;
        repeat(12) begin
            @(negedge clk);
            start ='b0;
        end
        if(product == exp) begin
            $display("successful");
        end else begin
            $display("fail");
        end
    end
    $stop();
end

endmodule

It should be noted that an error code appears when using modelsim simulation 211. This is to turn off the waveform optimization function to perform normal simulation.

Reference: https://cloud.tencent.com/developer/article/1110628 ROM multiplier basic algorithm single ROM multiplier time-sharing multiplexing ROM multiplier-Cloud + Community-Tencent Cloud