# Basic algorithm

The algorithm of the ROM multiplier is relatively simple, that is, use a ROM to save the result of the multiplication, and directly go to the corresponding address to look up the table when the operation is needed. For example, to calculate the multiplication of two 4-bit binary numbers a*b, you need an 8-bit input and 8-bit output ROM to store the calculation results. The relationship between the address and the stored data is: the address {a,b}(bit splicing) stores a*b (for example, the address is 8'b00010010The result of storage is 0001*0001=8'b00000010) The ROM used in this case is relatively large, so when the timing requirements are not strict, the clock can be used to change the area. For example, for an 8-bit * 8-bit ROM multiplier, we split it into a multiplier 1 high 4 digits, low 4 digits and multiplier 2 high 4 digits low 4 digits are multiplied in pairs. After the high four bits and the high four bits are multiplied, the result is shifted to the left by 4 bits, and the result of the multiplication of the high four bits and the low four bits is shifted to the left by 2 bits. Multiplication commonly used routines) can be obtained after four (minimum) clock cycles, the ROM used can be reduced from 16*16 to 4*4

# Single ROM multiplier

## Python generator

A single ROM can be simulated with case statements in Verilog. Handwriting such highly repetitive case statements is undoubtedly a very inefficient method. This time, Python statements are used to generate

class ROMGenerator(object):
"""docstring for ROMGenerator"""

def __init__(self, Width):
super(ROMGenerator, self).__init__()
self.Width = Width

def GeneratorROM(self, FileName):
RomContent = ["""
module ROM_%s (
output reg [%s:0]dout
);

always @(*) begin
"""% (self.Width, self.Width * 2-1, self.Width * 2-1)]
for i in range(2 ** self.Width):
for j in range(2 ** self.Width):
RomContent.append(
"\t\t%s\'d%s:dout = %s\'d%s;"%
(2 * self.Width, i * (2 ** self.Width) + j,
2 * self.Width, i * j))
RomContent.append("""\t\tdefault:dout =/'b0;
endcase
end
endmodule
""")
with open("./%sv"% FileName, "w") as filepoint:
filepoint.write("\n".join(RomContent))
return "\n".join(RomContent)

if __name__ =='__main__':
test = ROMGenerator(4)
print(test.GeneratorROM("ROM_4"))

The code is very simple, except for the beginning and the end, it is needed for batch generation\t\t%s\'d%s:dout = %s\'d%s;

## testing platform

When testing, use the test platform written by SystemVerilog, *and compare the output of its own module with operators

module mult_tb (
);

parameter WIDTH = 4;

logic clk,rst_n;
logic [WIDTH-1:0]multiplier1;
logic [WIDTH-1:0]multiplier2;

logic [2 * WIDTH-1:0]product;

ROM_4 dut(
.dout(product)
);

initial begin
clk = 1'b0;
forever begin
#50 clk = ~clk;
end
end

initial begin
rst_n = 1'b1;
#5 rst_n = 1'b0;
#10 rst_n = 1'b1;
end

initial begin
{multiplier1,multiplier2} ='b0;
repeat(100) begin
@(negedge clk);
multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH)); multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
end
$stop(); end logic [2 * WIDTH-1:0]exp; initial begin exp ='b0; forever begin @(posedge clk); exp = multiplier1 * multiplier2; if(exp == product) begin$display("successful");
end else begin
$display("fail"); end end end endmodule # Time division multiplexed ROM multiplier ## RTL code ### core part module serial_multrom_mult_core #( parameter HALF_WIDTH = 4 )( input clk,//Clock input rst_n,//Asynchronous reset active low input [2 * HALF_WIDTH-1:0]mult1,mult2, input start, input [2 * HALF_WIDTH-1:0]rom_dout, output reg [2 * HALF_WIDTH-1:0]rom_address, output reg [4 * HALF_WIDTH-1:0]dout ); parameter INIT = 1'b0, WORK = 1'b1; reg mode; reg [1:0]counte_4_decay2; always @ (posedge clk or negedge rst_n) begin if(~rst_n) begin mode <= 1'b0; end else begin case (mode) INIT:begin if(start == 1'b1) begin mode <= WORK; end else begin mode <= INIT; end end WORK:begin if(counte_4_decay2 == 2'd3) begin mode <= INIT; end else begin mode <= WORK; end end default:mode <= INIT; endcase end end So far is the state part of a state machine. When the start signal is valid, the state becomes WORKand the operation ends INIT. reg [1:0]counte_4; always @(posedge clk or negedge rst_n) begin: proc_counte_4 if(~rst_n) begin counte_4 <='b0; end else if(mode == WORK)begin counte_4 <= counte_4 + 1'b1; end else begin counte_4 <='b0; end end reg [2 * HALF_WIDTH-1:0]mult1_lock,mult2_lock; always @(posedge clk or negedge rst_n) begin if(~rst_n) begin {mult1_lock,mult2_lock} <='b0; end else if(start == 1'b1)begin {mult1_lock,mult2_lock} <= {mult1,mult2}; end else begin {mult1_lock,mult2_lock} <= {mult1_lock,mult2_lock}; end end reg [1:0]counte_4_decay; always @ (posedge clk or negedge rst_n) begin if(~rst_n) begin {rom_address,counte_4_decay} <='b0; end else if(start == 1'b1) begin {rom_address,counte_4_decay} <='b0; end else begin case (counte_4) 2'd0:rom_address <= {mult1_lock[HALF_WIDTH-1:0],mult2_lock[HALF_WIDTH-1:0]}; 2'd1:rom_address <= {mult1_lock[2 * HALF_WIDTH-1:HALF_WIDTH],mult2_lock[HALF_WIDTH-1:0]}; 2'd2:rom_address <= {mult1_lock[HALF_WIDTH-1:0],mult2_lock[2 * HALF_WIDTH-1:HALF_WIDTH]}; 2'd3:rom_address <= {mult1_lock[2 * HALF_WIDTH-1:HALF_WIDTH],mult2_lock[2 * HALF_WIDTH-1:HALF_WIDTH]}; default:rom_address <='b0; endcase counte_4_decay <= counte_4; end end The above is the input control part. The high four digits of the multiplier 1 and the low four digits of the multiplier 2 are spliced ​​into the ROM to obtain the product. wire [4 * HALF_WIDTH-1:0]rom_dout_ex ='{rom_dout}; reg [4 * HALF_WIDTH-1:0]rom_dout_lock; always @ (posedge clk or negedge rst_n) begin if(~rst_n) begin {rom_dout_lock,counte_4_decay2} <='b0; end else if(start == 1'b1) begin {rom_dout_lock,counte_4_decay2} <='b0; end else begin case (counte_4_decay) 2'd0:rom_dout_lock <= rom_dout_ex; 2'd1:rom_dout_lock <= rom_dout_ex << HALF_WIDTH; 2'd2:rom_dout_lock <= rom_dout_ex << HALF_WIDTH; 2'd3:rom_dout_lock <= rom_dout_ex << (2 * HALF_WIDTH); default:rom_dout_lock <='b0; endcase counte_4_decay2 <= counte_4_decay; end end always @ (posedge clk or negedge rst_n) begin if(~rst_n) begin dout <='b0; end else if(mode == WORK) begin dout <= dout + rom_dout_lock; end else if(start == 1'b1) begin dout <='b0; end else begin dout <= dout; end end endmodule After getting the data from ROM, it will be accumulated after shifting the corresponding digits according to the multiplier. ### Top part module serial_multrom_mult_top #( parameter HALF_WIDTH = 2 )( input clk,//Clock input rst_n,//Asynchronous reset active low input start, input [2 * HALF_WIDTH-1:0]mult1,mult2, output [4 * HALF_WIDTH-1:0]dout ); wire [2 * HALF_WIDTH-1:0]rom_dout; wire [2 * HALF_WIDTH-1:0]rom_address; serial_multrom_mult_core #( .HALF_WIDTH(HALF_WIDTH) ) u_serial_multrom_mult_core ( .clk(clk),//Clock .rst_n(rst_n),//Asynchronous reset active low .mult1(mult1), .mult2(mult2), .start(start), .rom_dout(rom_dout), .rom_address(rom_address), .dout(dout) ); ROM_4 u_ROM_4( .addr(rom_address), .dout(rom_dout) ); endmodule ## Testbench The testbench is improved by adding the clock and start signal to the testbench of a single ROM timescale 1ns/1ps module mult_tb ( ); parameter HALF_WIDTH = 4; parameter WIDTH = HALF_WIDTH * 2; logic clk,rst_n; logic start; logic [WIDTH-1:0]multiplier1; logic [WIDTH-1:0]multiplier2; logic [2 * WIDTH-1:0]product; serial_multrom_mult_top #( .HALF_WIDTH(HALF_WIDTH) ) dut ( .clk(clk),//Clock .rst_n(rst_n),//Asynchronous reset active low .start(start), .mult1(multiplier1), .mult2(multiplier2), .dout(product) ); initial begin clk = 1'b0; forever begin #50 clk = ~clk; end end initial begin rst_n = 1'b1; #5 rst_n = 1'b0; #10 rst_n = 1'b1; end logic [2 * WIDTH-1:0]exp; initial begin {multiplier1,multiplier2} ='b0; repeat(100) begin @(negedge clk); start = 1'b1; multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH)); exp = multiplier1 * multiplier2; repeat(12) begin @(negedge clk); start ='b0; end if(product == exp) begin$display("successful");
end else begin
$display("fail"); end end$stop();
end

endmodule

It should be noted that an error code appears when using modelsim simulation 211`. This is to turn off the waveform optimization function to perform normal simulation.

Reference: https://cloud.tencent.com/developer/article/1110628 ROM multiplier basic algorithm single ROM multiplier time-sharing multiplexing ROM multiplier-Cloud + Community-Tencent Cloud