分布式ram的问题
时间:10-02
整理:3721RD
点击:
帮忙看一段程序,后面是testbench。我要实现的是将数据存入一个128x16的矩阵,每次输入32位,输出地时候每次输出128位(也就是输出16行)。但是做仿真的时候,只输出了前15行的数据,没有输出第十六行的。
请高手指点一二。
module shifter(
input [31:0] a,
input clk,
input rst_n,
input [2:0] mb_partition,
input in_valid,
input [31:0] data_in,
input sde,
output[127:0] candata
);
reg [1:0] cnt1;
reg [7:0] cnt2;
reg [7:0] dram_addr_in,dram_addr_out;
reg dram_wr;
reg [127:0]din;
reg [31:0] temp_data1,temp_data2,temp_data3;
wire [127:0] dpo;
reg flag,flag1,flag2;
always@(posedge clk)
begin
if(!rst_n)begin
cnt1<=0;
dram_addr_in <= 0;
dram_addr_out <= 0;
flag <= 0;
end
begin
if(in_valid||flag) begin
flag<=1;
cnt1<=cnt1+1;
dram_wr<=1;
case(cnt1)
0:begin
temp_data1<= data_in;
end
1:begin
temp_data2 <= data_in;
end
2:begin
temp_data3<= data_in;
end
3:begin
dram_addr_in <= dram_addr_in + 1;
din <= {data_in,temp_data3,temp_data2,temp_data1};
end
endcase
end
if(sde)begin
dram_addr_out<=dram_addr_out+1;
//candata<=dpo;
//candata <= dpo;
end
end
end
dpram64 uut(.clk(clk),.addr(dram_addr_in),.dpra(dram_addr_out),.wr(in_valid),.din(din),.dpo(candata));
endmodule
module dpram64 // 64-Deep dual port ram
#( parameter WIDTH= 128,
parameter O_ORDER = "BLK_STD" //4x4块输出顺序
)//BLK_LRTB 4x4光栅顺序, BLK_STD 4x4标准顺序
(
input clk,
input [5:0] addr,
input [5:0] dpra,
inputwr,
input [WIDTH-1:0] din,
output[WIDTH-1:0] spo,
output[WIDTH-1:0] dpo
);
localparam U_CNT= (WIDTH+1)/2;
localparam RE_WIDTH = U_CNT*2;
wire [RE_WIDTH-1:0] _di ;
wire [RE_WIDTH-1:0] _spo;
wire [RE_WIDTH-1:0] _dpo;
wire [5:0] _dpa;
assign _di = { {(RE_WIDTH-WIDTH){1'b0}}, din};
assign dpo = _dpo;
assign spo = _spo;
generate
case(O_ORDER)
"BLK_STD" : assign _dpa = dpra;
"BLK_LRTB": assign _dpa = {dpra[5], dpra[3], dpra[4], dpra[2:0]};
default: assign _dpa = dpra;
endcase
endgenerate
generate
genvar ii;
for(ii = 0; ii < RE_WIDTH; ii = ii+1) begin : ram_unit
// RAM64X1D: Static Dual Port Synchronous RAM 64-Deep by 1-Wide
RAM64X1D inst(
.WCLK (clk),
.WE(wr),
.DPRA5(_dpa[5] ),
.DPRA4(_dpa[4] ),
.DPRA3(_dpa[3] ),
.DPRA2(_dpa[2] ),
.DPRA1(_dpa[1] ),
.DPRA0(_dpa[0] ),
.D(_di[ii] ),
.A5(addr[5] ),
.A4(addr[4] ),
.A3(addr[3] ),
.A2(addr[2] ),
.A1(addr[1] ),
.A0(addr[0] ),
.SPO(_spo[ii]),
.DPO(_dpo[ii])
);
end
endgenerate
endmodule
//testbench:
module tb;
// Inputs
reg [31:0] a;
reg clk;
reg rst_n;
reg [2:0] mb_partition;
reg in_valid;
reg [31:0] data_in;
reg sde;
// Outputs
wire [127:0] candata;
// Instantiate the Unit Under Test (UUT)
shifter uut (
.a(a),
.clk(clk),
.rst_n(rst_n),
.mb_partition(mb_partition),
.in_valid(in_valid),
.data_in(data_in),
.sde(sde),
.candata(candata)
);
always#50 clk = ~clk;
initial begin
// Initialize Inputs
a = 0;
clk = 0;
rst_n = 1;
mb_partition = 0;
in_valid = 0;
data_in = 0;
// Wait 100 ns for global reset to finish
#100;
rst_n = 0;
#100;
rst_n = 1;
in_valid = 1;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hc1_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hc1_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hc1_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hcc_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
in_valid = 0;
// Add stimulus here
#500;
sde = 1;
#2000;
sde = 0;
end
endmodule
请高手指点一二。
module shifter(
input [31:0] a,
input clk,
input rst_n,
input [2:0] mb_partition,
input in_valid,
input [31:0] data_in,
input sde,
output[127:0] candata
);
reg [1:0] cnt1;
reg [7:0] cnt2;
reg [7:0] dram_addr_in,dram_addr_out;
reg dram_wr;
reg [127:0]din;
reg [31:0] temp_data1,temp_data2,temp_data3;
wire [127:0] dpo;
reg flag,flag1,flag2;
always@(posedge clk)
begin
if(!rst_n)begin
cnt1<=0;
dram_addr_in <= 0;
dram_addr_out <= 0;
flag <= 0;
end
begin
if(in_valid||flag) begin
flag<=1;
cnt1<=cnt1+1;
dram_wr<=1;
case(cnt1)
0:begin
temp_data1<= data_in;
end
1:begin
temp_data2 <= data_in;
end
2:begin
temp_data3<= data_in;
end
3:begin
dram_addr_in <= dram_addr_in + 1;
din <= {data_in,temp_data3,temp_data2,temp_data1};
end
endcase
end
if(sde)begin
dram_addr_out<=dram_addr_out+1;
//candata<=dpo;
//candata <= dpo;
end
end
end
dpram64 uut(.clk(clk),.addr(dram_addr_in),.dpra(dram_addr_out),.wr(in_valid),.din(din),.dpo(candata));
endmodule
module dpram64 // 64-Deep dual port ram
#( parameter WIDTH= 128,
parameter O_ORDER = "BLK_STD" //4x4块输出顺序
)//BLK_LRTB 4x4光栅顺序, BLK_STD 4x4标准顺序
(
input clk,
input [5:0] addr,
input [5:0] dpra,
inputwr,
input [WIDTH-1:0] din,
output[WIDTH-1:0] spo,
output[WIDTH-1:0] dpo
);
localparam U_CNT= (WIDTH+1)/2;
localparam RE_WIDTH = U_CNT*2;
wire [RE_WIDTH-1:0] _di ;
wire [RE_WIDTH-1:0] _spo;
wire [RE_WIDTH-1:0] _dpo;
wire [5:0] _dpa;
assign _di = { {(RE_WIDTH-WIDTH){1'b0}}, din};
assign dpo = _dpo;
assign spo = _spo;
generate
case(O_ORDER)
"BLK_STD" : assign _dpa = dpra;
"BLK_LRTB": assign _dpa = {dpra[5], dpra[3], dpra[4], dpra[2:0]};
default: assign _dpa = dpra;
endcase
endgenerate
generate
genvar ii;
for(ii = 0; ii < RE_WIDTH; ii = ii+1) begin : ram_unit
// RAM64X1D: Static Dual Port Synchronous RAM 64-Deep by 1-Wide
RAM64X1D inst(
.WCLK (clk),
.WE(wr),
.DPRA5(_dpa[5] ),
.DPRA4(_dpa[4] ),
.DPRA3(_dpa[3] ),
.DPRA2(_dpa[2] ),
.DPRA1(_dpa[1] ),
.DPRA0(_dpa[0] ),
.D(_di[ii] ),
.A5(addr[5] ),
.A4(addr[4] ),
.A3(addr[3] ),
.A2(addr[2] ),
.A1(addr[1] ),
.A0(addr[0] ),
.SPO(_spo[ii]),
.DPO(_dpo[ii])
);
end
endgenerate
endmodule
//testbench:
module tb;
// Inputs
reg [31:0] a;
reg clk;
reg rst_n;
reg [2:0] mb_partition;
reg in_valid;
reg [31:0] data_in;
reg sde;
// Outputs
wire [127:0] candata;
// Instantiate the Unit Under Test (UUT)
shifter uut (
.a(a),
.clk(clk),
.rst_n(rst_n),
.mb_partition(mb_partition),
.in_valid(in_valid),
.data_in(data_in),
.sde(sde),
.candata(candata)
);
always#50 clk = ~clk;
initial begin
// Initialize Inputs
a = 0;
clk = 0;
rst_n = 1;
mb_partition = 0;
in_valid = 0;
data_in = 0;
// Wait 100 ns for global reset to finish
#100;
rst_n = 0;
#100;
rst_n = 1;
in_valid = 1;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hc1_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hc1_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hc1_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
data_in = 32'h11_11_11_11;
#100;
data_in = 32'h21_11_11_11;
#100;
data_in = 32'h31_11_11_11;
#100;
data_in = 32'h41_11_11_11;
#100;
data_in = 32'h51_11_11_11;
#100;
data_in = 32'h61_11_11_11;
#100;
data_in = 32'h71_11_11_11;
#100;
data_in = 32'h81_11_11_11;
#100;
data_in = 32'h91_11_11_11;
#100;
data_in = 32'ha1_11_11_11;
#100;
data_in = 32'hb1_11_11_11;
#100;
data_in = 32'hcc_11_11_11;
#100;
data_in = 32'hd1_11_11_11;
#100;
data_in = 32'he1_11_11_11;
#100;
data_in = 32'hf1_11_11_11;
#100;
data_in = 32'h01_11_11_11;
#100;
in_valid = 0;
// Add stimulus here
#500;
sde = 1;
#2000;
sde = 0;
end
endmodule
代码太多,还是弄个框图比较好,把方案说清楚,看代码费事!
谢谢楼上关心,问题已解决,问题是我的分布式ram的使能端没有及时赋零。