现在我们对以下情形进行事务级建模:GPU往显存(DDR)写一个32位的数据。
按照前面文章介绍的做法,我们可以在CPU发起一个事务,然后DDR接收并处理。但是,写显存的操作通常具有频繁和数据量大的特点,频繁的发起事务会降低仿真的执行效率。这里介绍一个比较高效的方法,
DMI,Direct Memory Interface,直接存储访问接口,即一个模块可以拥有另一个模块的存储的指针。
下面开始建模,
创建DDR和GPU两个模块
GPU是事务发起者,所有他有一个initiator socket;DDR是事务接收者,所以他有一个target socket,
#include <systemc.h>
#include <tlm.h>
#include <tlm_utils/simple_initiator_socket.h>
#include <tlm_utils/simple_target_socket.h>
class Gpu : public sc_module {
public:
SC_HAS_PROCESS(Gpu);
Gpu(sc_module_name name) : sc_module(name) {
}
public:
tlm_utils::simple_initiator_socket<Gpu> initiator_socket_ddr;
};
class Ddr : public sc_module {
public:
SC_HAS_PROCESS(Ddr);
Ddr(sc_module_name name) : sc_module(name) {
}
public:
tlm_utils::simple_target_socket<Ddr> target_socket_gpu;
private:
static constexpr uint32_t DDR_MEM_SIZE_1M = 1024 * 1024;
static constexpr uint32_t DDR_MEM_SIZE = DDR_MEM_SIZE_1M;
uint8_t mem[DDR_MEM_SIZE]{0};
};
连接两个模块
int sc_main(int argc, char *argv[]) {
Gpu gpu{"Gpu"};
Ddr ddr{"Ddr"};
gpu.initiator_socket_ddr(ddr.target_socket_gpu);
sc_start();
return 0;
}
DDR提供存储指针
给target socket注册一个获取存储指针的方法,
Ddr(sc_module_name name) : sc_module(name) {
target_socket_gpu.register_get_direct_mem_ptr(this, &Ddr::get_direct_mem_ptr_from_gpu);
}
该方法返回存储指针,
bool get_direct_mem_ptr_from_gpu(tlm::tlm_generic_payload &payload, tlm::tlm_dmi &dmi) {
dmi.allow_write(); // 只允许写
dmi.set_dmi_ptr(&mem[0]); // 存储指针
dmi.set_start_address(0); // 可写的地址范围的最小值
dmi.set_end_address(DDR_MEM_SIZE - 1); // 可写的地址范围的最大值
dmi.set_read_latency(SC_ZERO_TIME); // 写延迟
dmi.set_write_latency(SC_ZERO_TIME); // 读延迟
return true; // 是否返回了一个有效的存储指针
}
GPU访问DDR
注册一个访问DDR的进程方法,
Gpu(sc_module_name name) : sc_module(name) {
SC_METHOD(write_ddr);
}
在该方法中访问DDR,
void write_ddr() {
tlm::tlm_generic_payload payload;
tlm::tlm_dmi dmi;
bool valid = initiator_socket_ddr->get_direct_mem_ptr(payload, dmi);
if (valid) {
uint32_t addr = 0xf;
uint32_t data = 0x12345678;
memcpy(dmi.get_dmi_ptr() + addr, &data, sizeof(uint32_t));
}
}
在上面的方法中,往显存的0xf开始的地址写入了一个32位的数据:0x12345678
完整程序
#include <systemc.h>
#include <tlm.h>
#include <tlm_utils/simple_initiator_socket.h>
#include <tlm_utils/simple_target_socket.h>
class Gpu : public sc_module {
public:
SC_HAS_PROCESS(Gpu);
Gpu(sc_module_name name) : sc_module(name) {
SC_METHOD(write_ddr);
}
void write_ddr() {
tlm::tlm_generic_payload payload;
tlm::tlm_dmi dmi;
bool valid = initiator_socket_ddr->get_direct_mem_ptr(payload, dmi);
if (valid) {
uint32_t addr = 0xf;
uint32_t data = 0x12345678;
memcpy(dmi.get_dmi_ptr() + addr, &data, sizeof(uint32_t));
}
}
public:
tlm_utils::simple_initiator_socket<Gpu> initiator_socket_ddr;
};
class Ddr : public sc_module {
public:
SC_HAS_PROCESS(Ddr);
Ddr(sc_module_name name) : sc_module(name) {
target_socket_gpu.register_get_direct_mem_ptr(this, &Ddr::get_direct_mem_ptr_from_gpu);
}
bool get_direct_mem_ptr_from_gpu(tlm::tlm_generic_payload &payload, tlm::tlm_dmi &dmi) {
dmi.allow_write();
dmi.set_dmi_ptr(&mem[0]);
dmi.set_start_address(0);
dmi.set_end_address(DDR_MEM_SIZE - 1);
dmi.set_read_latency(SC_ZERO_TIME);
dmi.set_write_latency(SC_ZERO_TIME);
return true;
}
uint8_t read(uint32_t addr) {
assert(addr < DDR_MEM_SIZE);
return mem[addr];
}
public:
tlm_utils::simple_target_socket<Ddr> target_socket_gpu;
private:
static constexpr uint32_t DDR_MEM_SIZE_1M = 1024 * 1024;
static constexpr uint32_t DDR_MEM_SIZE = DDR_MEM_SIZE_1M;
uint8_t mem[DDR_MEM_SIZE]{0};
};
int sc_main(int argc, char *argv[]) {
Gpu gpu{"Gpu"};
Ddr ddr{"Ddr"};
gpu.initiator_socket_ddr(ddr.target_socket_gpu);
sc_start();
std::cout << std::hex << (uint32_t)ddr.read(0xf + 0x0) << std::endl;
std::cout << std::hex << (uint32_t)ddr.read(0xf + 0x1) << std::endl;
std::cout << std::hex << (uint32_t)ddr.read(0xf + 0x2) << std::endl;
std::cout << std::hex << (uint32_t)ddr.read(0xf + 0x3) << std::endl;
return 0;
}
运行结果
SystemC 2.3.3-Accellera --- Mar 10 2021 20:59:57
Copyright (c) 1996-2018 by all Contributors,
ALL RIGHTS RESERVED
78
56
34
12
Process finished with exit code 0
网友评论