Skip to content

Commit ac18a5a

Browse files
committed
Merge remote-tracking branch 'upstream/tensor_unit' into simx-vpu
2 parents b2ad2e5 + 3e7f9cf commit ac18a5a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2022
-3550
lines changed

ci/regression.sh.in

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,6 @@ regression()
104104
# test temp driver mode for
105105
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
106106

107-
# test for matmul
108-
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
109-
110107
echo "regression tests done!"
111108
}
112109

@@ -301,11 +298,11 @@ config2()
301298

302299
# test single-bank memory
303300
if [ "$XLEN" == "64" ]; then
304-
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress
305-
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress
301+
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
302+
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
306303
else
307-
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress
308-
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress
304+
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
305+
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
309306
fi
310307

311308
# test larger memory address
@@ -322,10 +319,10 @@ config2()
322319
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
323320

324321
# test memory ports
325-
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
326-
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
327-
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
328-
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
322+
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
323+
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
324+
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
325+
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
329326
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
330327
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
331328

hw/rtl/VX_config.vh

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
`endif
3232

3333
///////////////////////////////////////////////////////////////////////////////
34+
3435
`ifndef EXT_M_DISABLE
3536
`define EXT_M_ENABLE
3637
`endif
@@ -113,24 +114,6 @@
113114
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
114115
`endif
115116

116-
// Size of Tensor Core
117-
`ifndef TC_SIZE
118-
`define TC_SIZE 8
119-
`endif
120-
121-
// Number of TCs per Warp
122-
`ifndef TC_NUM
123-
`define TC_NUM 4
124-
`endif
125-
126-
`ifndef NUM_TCU_LANES
127-
`define NUM_TCU_LANES `TC_NUM
128-
`endif
129-
130-
`ifndef NUM_TCU_BLOCKS
131-
`define NUM_TCU_BLOCKS `ISSUE_WIDTH
132-
`endif
133-
134117
`ifdef L2_ENABLE
135118
`define L2_ENABLED 1
136119
`else
@@ -172,8 +155,26 @@
172155
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
173156
`endif
174157

175-
`ifndef PLATFORM_MEMORY_BANKS
176-
`define PLATFORM_MEMORY_BANKS 2
158+
// Platform memory parameters
159+
160+
`ifndef PLATFORM_MEMORY_NUM_BANKS
161+
`define PLATFORM_MEMORY_NUM_BANKS 2
162+
`endif
163+
164+
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
165+
`ifdef XLEN_64
166+
`define PLATFORM_MEMORY_ADDR_WIDTH 48
167+
`else
168+
`define PLATFORM_MEMORY_ADDR_WIDTH 32
169+
`endif
170+
`endif
171+
172+
`ifndef PLATFORM_MEMORY_DATA_SIZE
173+
`define PLATFORM_MEMORY_DATA_SIZE 64
174+
`endif
175+
176+
`ifndef PLATFORM_MEMORY_INTERLEAVE
177+
`define PLATFORM_MEMORY_INTERLEAVE 1
177178
`endif
178179

179180
`ifdef XLEN_64
@@ -299,7 +300,8 @@
299300
`define MEM_PAGE_LOG2_SIZE (12)
300301
`endif
301302

302-
// Virtual Memory Configuration ///////////////////////////////////////////////////////
303+
// Virtual Memory Configuration ///////////////////////////////////////////////
304+
303305
`ifdef VM_ENABLE
304306
`ifdef XLEN_32
305307
`ifndef VM_ADDR_MODE
@@ -527,6 +529,12 @@
527529
`define FNCP_PE_RATIO 2
528530
`endif
529531

532+
// Tensore Units //////////////////////////////////////////////////////////////
533+
534+
`ifndef NUM_TENSOR_CORES
535+
`define NUM_TENSOR_CORES `ISSUE_WIDTH
536+
`endif
537+
530538
// Icache Configurable Knobs //////////////////////////////////////////////////
531539

532540
// Cache Enable
@@ -656,9 +664,9 @@
656664
// Number of Memory Ports
657665
`ifndef L1_MEM_PORTS
658666
`ifdef L1_DISABLE
659-
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
667+
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
660668
`else
661-
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
669+
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
662670
`endif
663671
`endif
664672

@@ -735,9 +743,9 @@
735743
// Number of Memory Ports
736744
`ifndef L2_MEM_PORTS
737745
`ifdef L2_ENABLE
738-
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
746+
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
739747
`else
740-
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
748+
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
741749
`endif
742750
`endif
743751

@@ -796,9 +804,9 @@
796804
// Number of Memory Ports
797805
`ifndef L3_MEM_PORTS
798806
`ifdef L3_ENABLE
799-
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
807+
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
800808
`else
801-
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
809+
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
802810
`endif
803811
`endif
804812

@@ -846,6 +854,12 @@
846854
`define EXT_ZICOND_ENABLED 0
847855
`endif
848856

857+
`ifdef EXT_TPU_ENABLE
858+
`define EXT_TPU_ENABLED 1
859+
`else
860+
`define EXT_TPU_ENABLED 0
861+
`endif
862+
849863
`define ISA_STD_A 0
850864
`define ISA_STD_C 2
851865
`define ISA_STD_D 3

hw/rtl/VX_types.vh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,4 @@
229229
`define VX_CSR_NUM_CORES 12'hFC2
230230
`define VX_CSR_LOCAL_MEM_BASE 12'hFC3
231231

232-
`define VX_MAT_MUL_SIZE 12'hFC4 // VX_MAT_MUL_SIZE = Matrix Size / TC Size
233-
`define VX_TC_NUM 12'hFC5
234-
`define VX_TC_SIZE 12'hFC6
235-
236-
237-
238232
`endif // VX_TYPES_VH

hw/rtl/Vortex_axi.sv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
193193
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
194194
.NUM_PORTS_IN (`VX_MEM_PORTS),
195195
.NUM_BANKS_OUT (AXI_NUM_BANKS),
196-
.INTERLEAVE (0),
196+
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
197197
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
198198
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || AXI_NUM_BANKS > 1) ? 2 : 0)
199199
) axi_adapter (

hw/rtl/afu/opae/local_mem_cfg_pkg.sv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,18 @@
2828
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2929
// POSSIBILITY OF SUCH DAMAGE.
3030

31-
//`include "platform_afu_top_config.vh"
31+
`include "VX_define.vh"
3232

3333
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
34-
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8))
34+
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH ((`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_NUM_BANKS)) - $clog2(`PLATFORM_MEMORY_DATA_SIZE))
3535
`endif
3636

3737
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
38-
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
38+
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH (`PLATFORM_MEMORY_DATA_SIZE * 8)
3939
`endif
4040

4141
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
42-
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
42+
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4
4343
`endif
4444

4545
package local_mem_cfg_pkg;

hw/rtl/afu/opae/vortex_afu.sv

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,14 @@
1111
// See the License for the specific language governing permissions and
1212
// limitations under the License.
1313

14+
`include "VX_define.vh"
15+
1416
`ifndef NOPAE
1517
`include "afu_json_info.vh"
1618
`else
1719
`include "vortex_afu.vh"
1820
`endif
1921

20-
`include "VX_define.vh"
21-
22-
`ifndef PLATFORM_MEMORY_INTERLEAVE
23-
`define PLATFORM_MEMORY_INTERLEAVE 1
24-
`endif
25-
2622
module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #(
2723
parameter NUM_LOCAL_MEM_BANKS = 2
2824
) (

hw/rtl/afu/xrt/VX_afu_ctrl.sv

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,12 @@ module VX_afu_ctrl #(
134134
RSTATE_RESP = 2'd2,
135135
RSTATE_WIDTH = 2;
136136

137+
localparam MEMORY_BANK_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH - `CLOG2(`PLATFORM_MEMORY_NUM_BANKS);
138+
137139
// device caps
138140
wire [63:0] dev_caps = {8'b0,
139-
5'(`PLATFORM_MEMORY_ADDR_WIDTH-20),
140-
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
141+
5'(MEMORY_BANK_ADDR_WIDTH-20),
142+
3'(`CLOG2(`PLATFORM_MEMORY_NUM_BANKS)),
141143
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
142144
16'(`NUM_CORES * `NUM_CLUSTERS),
143145
8'(`NUM_WARPS),

hw/rtl/afu/xrt/VX_afu_wrap.sv

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@
1616
`include "vortex_afu.vh"
1717

1818
module VX_afu_wrap #(
19-
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
20-
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
21-
parameter C_M_AXI_MEM_ID_WIDTH = 32,
22-
parameter C_M_AXI_MEM_DATA_WIDTH = 512,
23-
parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
24-
parameter C_M_AXI_MEM_NUM_BANKS = 2
19+
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
20+
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
21+
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
22+
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_SIZE * 8,
23+
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
24+
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
25+
parameter C_M_AXI_MEM_NUM_BANKS = 1
26+
`else
27+
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_NUM_BANKS
28+
`endif
2529
) (
2630
// System signals
2731
input wire clk,
@@ -31,7 +35,7 @@ module VX_afu_wrap #(
3135
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
3236
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
3337
`else
34-
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
38+
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
3539
`endif
3640
// AXI4-Lite slave interface
3741
input wire s_axi_ctrl_awvalid,
@@ -58,11 +62,7 @@ module VX_afu_wrap #(
5862

5963
output wire interrupt
6064
);
61-
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
62-
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS);
63-
`else
6465
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH;
65-
`endif
6666

6767
typedef enum logic [1:0] {
6868
STATE_IDLE = 0,
@@ -71,8 +71,8 @@ module VX_afu_wrap #(
7171
STATE_DONE = 3
7272
} state_e;
7373

74-
localparam PENDING_SIZEW = 12; // max outstanding requests size
75-
localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
74+
localparam PENDING_WR_SIZEW = 12; // max outstanding requests size
75+
localparam NUM_MEM_BANKS_SIZEW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
7676

7777
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
7878
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
@@ -108,11 +108,11 @@ module VX_afu_wrap #(
108108
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
109109
`REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
110110
`else
111-
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
111+
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
112112
`endif
113113

114114
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
115-
reg [PENDING_SIZEW-1:0] vx_pending_writes;
115+
reg [PENDING_WR_SIZEW-1:0] vx_pending_writes;
116116
reg vx_reset = 1; // asserted at initialization
117117
wire vx_busy;
118118

@@ -200,7 +200,7 @@ module VX_afu_wrap #(
200200
end
201201

202202
wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire;
203-
wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps;
203+
wire [NUM_MEM_BANKS_SIZEW-1:0] cur_wr_reqs, cur_wr_rsps;
204204

205205
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_m_axi_wr_req_fire
206206
VX_axi_write_ack axi_write_ack (
@@ -224,14 +224,14 @@ module VX_afu_wrap #(
224224
`POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire);
225225
`POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire);
226226

227-
wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) -
228-
(C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps);
227+
wire signed [NUM_MEM_BANKS_SIZEW:0] reqs_sub = (NUM_MEM_BANKS_SIZEW+1)'(cur_wr_reqs) -
228+
(NUM_MEM_BANKS_SIZEW+1)'(cur_wr_rsps);
229229

230230
always @(posedge clk) begin
231231
if (reset) begin
232232
vx_pending_writes <= '0;
233233
end else begin
234-
vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub);
234+
vx_pending_writes <= vx_pending_writes + PENDING_WR_SIZEW'(reqs_sub);
235235
end
236236
end
237237

@@ -270,7 +270,7 @@ module VX_afu_wrap #(
270270
.ap_ready (ap_ready),
271271
.ap_idle (ap_idle),
272272
.interrupt (interrupt),
273-
273+
274274
.ap_ctrl_read (ap_ctrl_read),
275275

276276
`ifdef SCOPE
@@ -287,9 +287,8 @@ module VX_afu_wrap #(
287287
wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
288288

289289
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing
290-
localparam [C_M_AXI_MEM_ADDR_WIDTH-1:0] BANK_OFFSET = C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET) + C_M_AXI_MEM_ADDR_WIDTH'(i) << M_AXI_MEM_ADDR_WIDTH;
291-
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + BANK_OFFSET;
292-
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET;
290+
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
291+
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
293292
end
294293

295294
`SCOPE_IO_SWITCH (2);

0 commit comments

Comments
 (0)