core-v-verif系列之cva6 verilator Model之Variane_testharness.sv相关模块sram-CSDN博客

本文链接：https://blog.csdn.net/lincolnjunior_lj/article/details/146554054

commitID：21506e4

sram


  sram #(
    .DATA_WIDTH ( AXI_DATA_WIDTH ),
    .USER_WIDTH ( AXI_USER_WIDTH ),
    .USER_EN    ( AXI_USER_EN    ),
`ifdef VERILATOR
    .SIM_INIT   ( "none"         ),
`else
    .SIM_INIT   ( "zeros"        ),
`endif
    .NUM_WORDS  ( NUM_WORDS      )
  ) i_sram (
    .clk_i      ( clk_i                                                                       ),
    .rst_ni     ( rst_ni                                                                      ),
    .req_i      ( req                                                                         ),
    .we_i       ( we                                                                          ),
    .addr_i     ( addr[$clog2(NUM_WORDS)-1+$clog2(AXI_DATA_WIDTH/8):$clog2(AXI_DATA_WIDTH/8)] ),
    .wuser_i    ( wuser                                                                       ),
    .wdata_i    ( wdata                                                                       ),
    .be_i       ( be                                                                          ),
    .ruser_o    ( ruser                                                                       ),
    .rdata_o    ( rdata                                                                       )
  );

参数说明

DATA_WIDTH: 指定数据总线的宽度，这里绑定到 AXI_DATA_WIDTH。
USER_WIDTH: 指定用户信号的宽度，绑定到 AXI_USER_WIDTH。
USER_EN: 启用用户信号，绑定到 AXI_USER_EN。
SIM_INIT: 仿真初始化选项。如果使用 Verilator 仿真工具，初始化为 “none”；否则初始化为 “zeros”。
NUM_WORDS: 指定 SRAM 的存储单元数量，绑定到 NUM_WORDS。

信号连接

clk_i: 时钟输入信号，连接到 clk_i。
rst_ni: 复位输入信号（低电平有效），连接到 rst_ni。
req_i: 请求信号，连接到 req。
we_i: 写使能信号，连接到 we。
addr_i: 地址输入信号。这里对地址进行了位宽裁剪，确保其适用于 SRAM 的地址范围。
wuser_i: 写用户信号，连接到 wuser。
wdata_i: 写数据输入信号，连接到 wdata。
be_i: 字节使能信号，连接到 be。
ruser_o: 读用户信号输出，连接到 ruser。
rdata_o: 读数据输出，连接到 rdata。

代码特点

参数化设计: 通过参数化配置，使得 SRAM 模块可以灵活适应不同的数据宽度、用户信号宽度和存储容量需求。
条件编译: 使用 ifdef VERILATOR 进行条件编译，根据是否使用 Verilator 仿真工具来设置不同的初始化选项。
地址切片: 对地址信号进行切片操作，确保其适用于 SRAM 的地址范围，这在处理不同宽度的地址总线时非常有用。

module sram定义


module sram #(
    parameter DATA_WIDTH = 64,
    parameter USER_WIDTH = 1,
    parameter USER_EN    = 0,
    parameter NUM_WORDS  = 1024,
    parameter SIM_INIT   = "none",
    parameter OUT_REGS   = 0     // enables output registers in FPGA macro (read lat = 2)
)(
   input  logic                          clk_i,
   input  logic                          rst_ni,
   input  logic                          req_i,
   input  logic                          we_i,
   input  logic [$clog2(NUM_WORDS)-1:0]  addr_i,
   input  logic [USER_WIDTH-1:0]         wuser_i,
   input  logic [DATA_WIDTH-1:0]         wdata_i,
   input  logic [(DATA_WIDTH+7)/8-1:0]   be_i,
   output logic [USER_WIDTH-1:0]         ruser_o,
   output logic [DATA_WIDTH-1:0]         rdata_o
);

localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size
localparam BE_WIDTH_ALIGNED   = (((DATA_WIDTH+7)/8+7)/8)*8;

logic [DATA_WIDTH_ALIGNED-1:0]  wdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0]  wuser_aligned;
logic [BE_WIDTH_ALIGNED-1:0]    be_aligned;
logic [DATA_WIDTH_ALIGNED-1:0]  rdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0]  ruser_aligned;


// align to 64 bits for inferrable macro below
always_comb begin : p_align
    wdata_aligned                    ='0;
    wuser_aligned                    ='0;
    be_aligned                       ='0;
    wdata_aligned[DATA_WIDTH-1:0]    = wdata_i;
    wuser_aligned[USER_WIDTH-1:0]    = wuser_i;
    be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;

    rdata_o = rdata_aligned[DATA_WIDTH-1:0];
    ruser_o = ruser_aligned[USER_WIDTH-1:0];
end

  for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut
      // unused byte-enable segments (8bits) are culled by the tool
      tc_sram_wrapper #(
        .NumWords(NUM_WORDS),           // Number of Words in data array
        .DataWidth(64),                 // Data signal width
        .ByteWidth(32'd8),              // Width of a data byte
        .NumPorts(32'd1),               // Number of read and write ports
        .Latency(32'd1),                // Latency when the read data is available
        .SimInit(SIM_INIT),             // Simulation initialization
        .PrintSimCfg(1'b0)              // Print configuration
      ) i_tc_sram_wrapper (
          .clk_i    ( clk_i                     ),
          .rst_ni   ( rst_ni                    ),
          .req_i    ( req_i                     ),
          .we_i     ( we_i                      ),
          .be_i     ( be_aligned[k*8 +: 8]      ),
          .wdata_i  ( wdata_aligned[k*64 +: 64] ),
          .addr_i   ( addr_i                    ),
          .rdata_o  ( rdata_aligned[k*64 +: 64] )
      );
      if (USER_EN > 0) begin : gen_mem_user
        tc_sram_wrapper #(
          .NumWords(NUM_WORDS),           // Number of Words in data array
          .DataWidth(64),                 // Data signal width
          .ByteWidth(32'd8),              // Width of a data byte
          .NumPorts(32'd1),               // Number of read and write ports
          .Latency(32'd1),                // Latency when the read data is available
          .SimInit(SIM_INIT),             // Simulation initialization
          .PrintSimCfg(1'b0)              // Print configuration
        ) i_tc_sram_wrapper_user (
            .clk_i    ( clk_i                     ),
            .rst_ni   ( rst_ni                    ),
            .req_i    ( req_i                     ),
            .we_i     ( we_i                      ),
            .be_i     ( be_aligned[k*8 +: 8]      ),
            .wdata_i  ( wuser_aligned[k*64 +: 64] ),
            .addr_i   ( addr_i                    ),
            .rdata_o  ( ruser_aligned[k*64 +: 64] )
        );
      end else begin : gen_mem_user
          assign ruser_aligned[k*64 +: 64] = '0;
          // synthesis translate_off
          begin: i_tc_sram_wrapper_user
            begin: i_tc_sram
              localparam type data_t = logic [63:0];
              data_t init_val [0:0];
              data_t sram [NUM_WORDS-1:0] /* verilator public_flat */;
            end
          end
          // synthesis translate_on
      end
  end
endmodule : sram

该 sram 模块实现了一个可配置的同步 SRAM（静态随机存取存储器），适用于 FPGA 或 ASIC 设计。该模块封装了一层 SRAM 访问逻辑，并对输入/输出数据进行 64 位对齐，以便推断出 可综合（synthesizable） 的存储器结构。

1. 模块参数

parameter DATA_WIDTH = 64,       // 数据宽度，默认 64 位
parameter USER_WIDTH = 1,        // 用户定义宽度（存储额外信息）
parameter USER_EN    = 0,        // 是否启用用户数据存储
parameter NUM_WORDS  = 1024,     // 存储器中的字数
parameter SIM_INIT   = "none",   // 仿真时的初始化方式
parameter OUT_REGS   = 0         // 是否启用输出寄存器（影响读延迟）

参数说明：

DATA_WIDTH：决定存储的数据宽度，默认 64 位。
USER_WIDTH：额外的用户数据宽度，默认 1 位（可选）。
USER_EN：如果启用，则存储并返回 USER_WIDTH 用户数据。
NUM_WORDS：SRAM 的存储单元数量，默认 1024（1K）。
SIM_INIT：用于仿真初始化方式，可能用于调试时加载预设值。
OUT_REGS：在 FPGA 上决定是否启用输出寄存器，影响读取延迟。

2. 端口信号

input  logic                          clk_i,   // 时钟信号
input  logic                          rst_ni,  // 低电平复位
input  logic                          req_i,   // 读写请求信号
input  logic                          we_i,    // 写使能信号（1 = 写入，0 = 读取）
input  logic [$clog2(NUM_WORDS)-1:0]  addr_i,  // 访问地址
input  logic [USER_WIDTH-1:0]         wuser_i, // 用户数据输入
input  logic [DATA_WIDTH-1:0]         wdata_i, // 数据输入
input  logic [(DATA_WIDTH+7)/8-1:0]   be_i,    // 字节使能信号（决定哪些字节被写入）
output logic [USER_WIDTH-1:0]         ruser_o, // 用户数据输出
output logic [DATA_WIDTH-1:0]         rdata_o  // 数据输出

信号说明：

clk_i：主时钟信号。
rst_ni：低电平复位，0 时复位模块。
req_i：SRAM 访问请求信号。
we_i：写使能信号，决定 读/写 操作。
addr_i：存储器的地址，位宽为 log2(NUM_WORDS)。
wdata_i / rdata_o：写/读 数据通道。
wuser_i / ruser_o：用户数据存取接口（可选）。
be_i：字节使能信号，决定哪些字节需要写入。

3. 变量定义

localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // 待优化，减少内存占用
localparam BE_WIDTH_ALIGNED   = (((DATA_WIDTH+7)/8+7)/8)*8;

对齐参数计算：

DATA_WIDTH_ALIGNED：计算后对 64 位对齐，保证 DATA_WIDTH 为 64 的倍数。
USER_WIDTH_ALIGNED：用户数据对齐，目前等于 DATA_WIDTH_ALIGNED（可能浪费存储空间）。
BE_WIDTH_ALIGNED：字节使能位对齐，对 8 位 进行字节对齐，确保存储器支持部分写。

4. 64 位对齐数据处理

always_comb begin : p_align
    wdata_aligned = '0;
    wuser_aligned = '0;
    be_aligned    = '0;

    wdata_aligned[DATA_WIDTH-1:0] = wdata_i;
    wuser_aligned[USER_WIDTH-1:0] = wuser_i;
    be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;

    rdata_o = rdata_aligned[DATA_WIDTH-1:0];
    ruser_o = ruser_aligned[USER_WIDTH-1:0];
end

作用：

wdata_i 和 wuser_i 先清零，然后只填充有效部分，防止无效数据影响存储。
rdata_o 和 ruser_o 只返回有效的 DATA_WIDTH 和 USER_WIDTH 数据。

why?

这段代码的目的是将输入信号 wdata_i、wuser_i 和 be_i 对齐到 64 位边界，以适配后续可推断的宏（如 tc_sram_wrapper）。下面详细解释代码是如何实现 64 位对齐的：

参数定义与对齐宽度计算

localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size
localparam BE_WIDTH_ALIGNED   = (((DATA_WIDTH+7)/8+7)/8)*8;

DATA_WIDTH_ALIGNED：通过 ((DATA_WIDTH + 63) / 64) * 64 计算得出，确保 DATA_WIDTH_ALIGNED 是 64 的整数倍且不小于 DATA_WIDTH。例如，如果 DATA_WIDTH 是 80，那么 (80 + 63) / 64 结果为 2，2 * 64 得到 128，即 DATA_WIDTH_ALIGNED 为 128。
USER_WIDTH_ALIGNED：这里直接赋值为 DATA_WIDTH_ALIGNED，后续可根据实际需求调整以减少内存使用。
BE_WIDTH_ALIGNED：同样通过计算确保其为 8 的整数倍，以满足字节使能信号的对齐要求。

信号声明

logic [DATA_WIDTH_ALIGNED-1:0]  wdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0]  wuser_aligned;
logic [BE_WIDTH_ALIGNED-1:0]    be_aligned;
logic [DATA_WIDTH_ALIGNED-1:0]  rdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0]  ruser_aligned;

声明了用于对齐的中间信号，这些信号的宽度都是 64 位的整数倍。

组合逻辑块 p_align

// align to 64 bits for inferrable macro below
always_comb begin : p_align
    wdata_aligned                    ='0;
    wuser_aligned                    ='0;
    be_aligned                       ='0;
    wdata_aligned[DATA_WIDTH-1:0]    = wdata_i;
    wuser_aligned[USER_WIDTH-1:0]    = wuser_i;
    be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;

    rdata_o = rdata_aligned[DATA_WIDTH-1:0];
    ruser_o = ruser_aligned[USER_WIDTH-1:0];
end

清零操作：
```
wdata_aligned ='0;
wuser_aligned ='0;
be_aligned    ='0;
```
首先将对齐信号全部清零，确保高位部分为 0。
数据赋值：
```
wdata_aligned[DATA_WIDTH-1:0]    = wdata_i;
wuser_aligned[USER_WIDTH-1:0]    = wuser_i;
be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;
```
将输入信号 wdata_i、wuser_i 和 be_i 赋值给对齐信号的低位部分。由于对齐信号的宽度是 64 位的整数倍，输入信号的高位部分会被自动补零，从而实现了 64 位对齐。
输出赋值：
```
rdata_o = rdata_aligned[DATA_WIDTH-1:0];
ruser_o = ruser_aligned[USER_WIDTH-1:0];
```
将对齐后的读取数据信号 rdata_aligned 和 ruser_aligned 的低位部分赋值给输出信号 rdata_o 和 ruser_o，去除了多余的高位部分。

综上所述，通过计算对齐宽度、声明对齐信号、清零操作和数据赋值，代码实现了将输入信号对齐到 64 位边界的功能。

5. SRAM 访问逻辑

for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut
    tc_sram_wrapper #(
        .NumWords(NUM_WORDS),
        .DataWidth(64),
        .ByteWidth(32'd8),
        .NumPorts(32'd1),
        .Latency(32'd1),
        .SimInit(SIM_INIT),
        .PrintSimCfg(1'b0)
    ) i_tc_sram_wrapper (
        .clk_i    ( clk_i                     ),
        .rst_ni   ( rst_ni                    ),
        .req_i    ( req_i                     ),
        .we_i     ( we_i                      ),
        .be_i     ( be_aligned[k*8 +: 8]      ),
        .wdata_i  ( wdata_aligned[k*64 +: 64] ),
        .addr_i   ( addr_i                    ),
        .rdata_o  ( rdata_aligned[k*64 +: 64] )
    );

作用：

基于 64 位分块存储数据，如果 DATA_WIDTH > 64，则分多次存储到多个 tc_sram_wrapper 模块中。
通过 genvar k 生成多个存储实例，用于 分块存取数据。
be_aligned[k*8 +: 8] 提取当前 64 位的字节使能信号，确保部分写入正确性。
wdata_aligned[k*64 +: 64] 将数据拆分，分别存入多个存储块。

6. 用户数据存储

if (USER_EN > 0) begin : gen_mem_user
    tc_sram_wrapper #(
        .NumWords(NUM_WORDS),
        .DataWidth(64),
        .ByteWidth(32'd8),
        .NumPorts(32'd1),
        .Latency(32'd1),
        .SimInit(SIM_INIT),
        .PrintSimCfg(1'b0)
    ) i_tc_sram_wrapper_user (
        .clk_i    ( clk_i                     ),
        .rst_ni   ( rst_ni                    ),
        .req_i    ( req_i                     ),
        .we_i     ( we_i                      ),
        .be_i     ( be_aligned[k*8 +: 8]      ),
        .wdata_i  ( wuser_aligned[k*64 +: 64] ),
        .addr_i   ( addr_i                    ),
        .rdata_o  ( ruser_aligned[k*64 +: 64] )
    );
    end else begin : gen_mem_user
          assign ruser_aligned[k*64 +: 64] = '0;
          // synthesis translate_off
          begin: i_tc_sram_wrapper_user
            begin: i_tc_sram
              localparam type data_t = logic [63:0];
              data_t init_val [0:0];
              data_t sram [NUM_WORDS-1:0] /* verilator public_flat */;
            end
          end
          // synthesis translate_on
      end
  end

作用：

只有 USER_EN > 0 时，才 额外存储用户数据。
如果 USER_EN = 0，则 ruser_aligned[k*64 +: 64] = '0;，即不存储用户数据。

总结

该模块支持可变数据宽度，但内部以 64 位 为单位进行存取。
支持部分写入（使用 be_i 字节使能）。
支持用户数据存储（USER_EN 控制）。
数据对齐逻辑确保 综合优化，保证 tc_sram_wrapper 能够正确推断出 SRAM 资源。

这是一种 高效的 FPGA/ASIC SRAM 设计，兼顾 灵活性 和 性能优化。

module tc_sram_wrapper 定义


module tc_sram_wrapper #(
  parameter int unsigned NumWords     = 32'd1024, // Number of Words in data array
  parameter int unsigned DataWidth    = 32'd128,  // Data signal width
  parameter int unsigned ByteWidth    = 32'd8,    // Width of a data byte
  parameter int unsigned NumPorts     = 32'd2,    // Number of read and write ports
  parameter int unsigned Latency      = 32'd1,    // Latency when the read data is available
  parameter              SimInit      = "none",   // Simulation initialization
  parameter bit          PrintSimCfg  = 1'b0,     // Print configuration
  // DEPENDENT PARAMETERS, DO NOT OVERWRITE!
  parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
  parameter int unsigned BeWidth   = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
  parameter type         addr_t    = logic [AddrWidth-1:0],
  parameter type         data_t    = logic [DataWidth-1:0],
  parameter type         be_t      = logic [BeWidth-1:0]
) (
  input  logic                 clk_i,      // Clock
  input  logic                 rst_ni,     // Asynchronous reset active low
  // input ports
  input  logic  [NumPorts-1:0] req_i,      // request
  input  logic  [NumPorts-1:0] we_i,       // write enable
  input  addr_t [NumPorts-1:0] addr_i,     // request address
  input  data_t [NumPorts-1:0] wdata_i,    // write data
  input  be_t   [NumPorts-1:0] be_i,       // write byte enable
  // output ports
  output data_t [NumPorts-1:0] rdata_o     // read data
);

// synthesis translate_off

  tc_sram #(
    .NumWords(NumWords),
    .DataWidth(DataWidth),
    .ByteWidth(ByteWidth),
    .NumPorts(NumPorts),
    .Latency(Latency),
    .SimInit(SimInit),
    .PrintSimCfg(PrintSimCfg)
  ) i_tc_sram (
      .clk_i    ( clk_i   ),
      .rst_ni   ( rst_ni  ),
      .req_i    ( req_i   ),
      .we_i     ( we_i    ),
      .be_i     ( be_i    ),
      .wdata_i  ( wdata_i ),
      .addr_i   ( addr_i  ),
      .rdata_o  ( rdata_o )
    );

// synthesis translate_on

endmodule

tc_sram_wrapper 是一个 SRAM 存储器封装模块，主要用于 FPGA 或 ASIC 设计中，并包含以下关键特性：

参数化设计：支持可变的存储深度、数据宽度、端口数量等。
多端口支持：可同时进行多个读写操作。
可变延迟：可配置读写延迟以匹配不同的存储架构。
仿真支持：在合成时使用 tc_sram 作为存储单元，但仿真时可以替换成不同的存储模型。

1. 主要参数解析

parameter int unsigned NumWords     = 32'd1024, // 存储单元的总数量（即 SRAM 深度）
parameter int unsigned DataWidth    = 32'd128,  // 数据总宽度
parameter int unsigned ByteWidth    = 32'd8,    // 单字节宽度，通常为 8
parameter int unsigned NumPorts     = 32'd2,    // 端口数量，支持多端口读写
parameter int unsigned Latency      = 32'd1,    // 读取数据的延迟
parameter              SimInit      = "none",   // 用于仿真的初始化方法
parameter bit          PrintSimCfg  = 1'b0,     // 是否打印仿真配置信息

NumWords 指定了存储器的 总字数，即存储的 数据块数量。
DataWidth 定义了 每个数据块的位宽。
ByteWidth 表示 每个字节的位宽，通常为 8（一个标准字节）。
NumPorts 允许多个读写端口，支持 多端口 SRAM 设计。
Latency 设定了 读取数据的延迟，决定了数据返回时钟周期数。
SimInit 用于指定 仿真时的数据初始化方式（如加载内存内容）。
PrintSimCfg 用于仿真时是否 打印配置信息。

2. 计算参数

parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,

AddrWidth 计算 地址位宽，$clog2(NumWords) 计算能索引 NumWords 个存储单元所需的地址位数。例如：
- NumWords = 1024 → AddrWidth = 10（因为 2^10 = 1024）。
- NumWords = 2048 → AddrWidth = 11（因为 2^11 = 2048）。

parameter int unsigned BeWidth   = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div

BeWidth 计算 Byte Enable（BE）的宽度，它表示 多少个字节可以单独写入。
BeWidth = (DataWidth + ByteWidth - 1) / ByteWidth 等效于 向上取整：
- DataWidth = 64 → BeWidth = 8（8 个字节）。
- DataWidth = 128 → BeWidth = 16（16 个字节）。

parameter type         addr_t    = logic [AddrWidth-1:0],
parameter type         data_t    = logic [DataWidth-1:0],
parameter type         be_t      = logic [BeWidth-1:0]

addr_t 定义 地址类型，确保 addr_i 信号的位宽符合 AddrWidth。
data_t 定义 数据类型，确保 wdata_i、rdata_o 具有 DataWidth 位宽。
be_t 定义 Byte Enable 类型，确保 be_i 具有 BeWidth 位宽。

3. 端口信号解析

input  logic                 clk_i,      // 时钟信号
input  logic                 rst_ni,     // 低电平有效复位信号
// 读写控制信号
input  logic  [NumPorts-1:0] req_i,      // 读/写请求信号
input  logic  [NumPorts-1:0] we_i,       // 写使能信号
input  addr_t [NumPorts-1:0] addr_i,     // 访问地址
input  data_t [NumPorts-1:0] wdata_i,    // 写入数据
input  be_t   [NumPorts-1:0] be_i,       // 写字节使能
// 读取数据输出
output data_t [NumPorts-1:0] rdata_o     // 读取数据

req_i：控制是否 执行读/写操作，每个端口都有独立的请求信号。
we_i：表示 当前请求是否为写操作，如果 we_i = 1，则 wdata_i 被写入 addr_i 处。
addr_i：指向 存储单元的地址，宽度由 AddrWidth 决定。
wdata_i：要写入的数据，宽度为 DataWidth。
be_i：字节写使能，用于 选择性写入 部分字节。
rdata_o：读取数据，宽度等于 DataWidth。

4. 仿真部分

// synthesis translate_off

synthesis translate_off 和 synthesis translate_on 之间的代码 仅在仿真时生效，不会被综合工具（如 FPGA 设计）使用。

tc_sram #(
    .NumWords(NumWords),
    .DataWidth(DataWidth),
    .ByteWidth(ByteWidth),
    .NumPorts(NumPorts),
    .Latency(Latency),
    .SimInit(SimInit),
    .PrintSimCfg(PrintSimCfg)
) i_tc_sram (
    .clk_i    ( clk_i   ),
    .rst_ni   ( rst_ni  ),
    .req_i    ( req_i   ),
    .we_i     ( we_i    ),
    .be_i     ( be_i    ),
    .wdata_i  ( wdata_i ),
    .addr_i   ( addr_i  ),
    .rdata_o  ( rdata_o )
);

tc_sram 是 底层 SRAM 组件，用于 存储数据。
这个模块 仅在仿真时 实例化 tc_sram 进行 存储仿真，确保 req_i、we_i、addr_i、wdata_i、be_i 等信号正确工作。

5. 关键特性

模块化设计：
- tc_sram_wrapper 作为 封装层，使 tc_sram 可复用，方便 不同参数化配置的存储单元复用。
多端口支持：
- NumPorts 允许同时 多端口访问，适用于 多处理器架构 或 高并发存储器访问。
可配置性：
- Latency 控制 读取数据的延迟，可用于 匹配不同的 SRAM 设计。
- SimInit 支持 不同的初始化模式，在仿真时可指定 预加载的数据。
合成优化：
- synthesis translate_off/on 确保 仿真和综合代码的分离，不会影响 FPGA/ASIC 设计。

总结

tc_sram_wrapper 是 参数化 SRAM 访问封装模块，支持：
- 灵活的存储深度 (NumWords)
- 可变数据宽度 (DataWidth)
- 可选多端口支持 (NumPorts)
- 可变字节使能 (BeWidth)
- 支持仿真 (SimInit)
它在仿真时实例化 tc_sram 进行存储操作，但在综合时可以使用 真实的 SRAM 宏单元，适用于 FPGA/ASIC 存储架构。