@@ -105,6 +105,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
105105 wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
106106 wire no_rsp_buf_valid, no_rsp_buf_ready;
107107
108+ wire [LSUQ_SIZEW - 1 : 0 ] pkt_waddr, pkt_raddr;
109+
108110 // fence handling
109111
110112 reg fence_lock;
@@ -208,71 +210,53 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
208210 end
209211 end
210212
211- // track SOP/EOP for out-of-order memory responses
212-
213- wire [LSUQ_SIZEW - 1 : 0 ] pkt_waddr, pkt_raddr;
213+ // multi-packet load responses could return out-of-order.
214+ // we should track and return eop packet response last.
214215
215216 if (PID_BITS != 0 ) begin : g_pids
216217 reg [`LSUQ_IN_SIZE - 1 : 0 ][PID_BITS : 0 ] pkt_ctr;
217- reg [`LSUQ_IN_SIZE - 1 : 0 ] pkt_sop, pkt_eop;
218+ reg [`LSUQ_IN_SIZE - 1 : 0 ] pkt_sop;
219+ reg [`LSUQ_IN_SIZE - 1 : 0 ] pkt_eop;
218220
219- wire mem_req_rd_fire = mem_req_fire && ~ mem_req_rw;
220- wire mem_req_rd_sop_fire = mem_req_rd_fire && execute_if.data.sop;
221+ wire mem_req_rd_fire = mem_req_fire && ~ mem_req_rw;
221222 wire mem_req_rd_eop_fire = mem_req_rd_fire && execute_if.data.eop;
222- wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
223- wire full;
224-
225- VX_allocator # (
226- .SIZE (`LSUQ_IN_SIZE )
227- ) pkt_allocator (
228- .clk (clk),
229- .reset (reset),
230- .acquire_en (mem_req_rd_eop_fire),
231- .acquire_addr (pkt_waddr),
232- .release_en (mem_rsp_eop_pkt),
233- .release_addr (pkt_raddr),
234- `UNUSED_PIN (empty),
235- .full (full)
236- );
237-
238- wire rd_during_wr = mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr);
223+ wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
224+
225+ assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
226+ assign mem_rsp_eop_pkt = mem_rsp_eop && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1 );
239227
240228 always @ (posedge clk) begin
241229 if (reset) begin
242- pkt_ctr <= '0 ;
243- pkt_sop <= '0 ;
244- pkt_eop <= '0 ;
245- end else begin
246- if (mem_req_rd_sop_fire) begin
247- pkt_sop[pkt_waddr] <= 1 ;
230+ for (integer i = 0 ; i < `LSUQ_IN_SIZE ; ++ i) begin
231+ pkt_ctr[i] <= '0 ;
232+ pkt_sop[i] <= 1 ;
233+ pkt_eop[i] <= 0 ;
248234 end
235+ end else begin
249236 if (mem_req_rd_eop_fire) begin
250237 pkt_eop[pkt_waddr] <= 1 ;
251238 end
252- if (mem_rsp_fire) begin
253- pkt_sop[pkt_raddr] <= 0 ;
254- end
255- if (mem_rsp_eop_pkt) begin
256- pkt_eop[pkt_raddr] <= 0 ;
257- end
258- if (~ rd_during_wr) begin
239+ if (~ (mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr))) begin
259240 if (mem_req_rd_fire) begin
260241 pkt_ctr[pkt_waddr] <= pkt_ctr[pkt_waddr] + PID_BITS ' (1 );
261242 end
262243 if (mem_rsp_eop_fire) begin
263244 pkt_ctr[pkt_raddr] <= pkt_ctr[pkt_raddr] - PID_BITS ' (1 );
264245 end
265246 end
247+ if (mem_rsp_fire) begin
248+ pkt_sop[pkt_raddr] <= 0 ;
249+ end
250+ if (mem_rsp_eop_fire && mem_rsp_eop_pkt) begin
251+ pkt_sop[pkt_raddr] <= 1 ;
252+ pkt_eop[pkt_raddr] <= 0 ;
253+ end
266254 end
267255 end
268-
269- assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
270- assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1 );
271- `RUNTIME_ASSERT (~ (mem_req_rd_fire && full), (" %t : allocator full!" , $time ))
272- `RUNTIME_ASSERT (~ mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], (" %t : oops! broken sop request!" , $time ))
273- `UNUSED_VAR (mem_rsp_sop)
256+ `RUNTIME_ASSERT (~ (mem_req_rd_fire && pkt_eop[pkt_waddr]), (" %t : oops! broken eop request! (#%0d )" , $time , execute_if.data.uuid))
257+ `RUNTIME_ASSERT (~ (mem_req_rd_fire && (2 ** PID_BITS - 1 ) == pkt_ctr[pkt_waddr]), (" %t : oops! broken ctr request! (#%0d )" , $time , execute_if.data.uuid))
258+ `RUNTIME_ASSERT (~ (mem_rsp_fire && 0 == pkt_ctr[pkt_raddr]), (" %t : oops! broken ctr response! (#%0d )" , $time , rsp_uuid))
274259 end else begin : g_no_pids
275- assign pkt_waddr = 0 ;
276260 assign mem_rsp_sop_pkt = mem_rsp_sop;
277261 assign mem_rsp_eop_pkt = mem_rsp_eop;
278262 `UNUSED_VAR (pkt_raddr)
@@ -337,8 +321,12 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
337321 .core_req_data (mem_req_data),
338322 .core_req_tag (mem_req_tag),
339323 .core_req_ready (mem_req_ready),
340- `UNUSED_PIN (core_req_empty),
341- `UNUSED_PIN (core_req_wr_notify),
324+ .core_req_queue_id (pkt_waddr),
325+
326+ // request queue info
327+ `UNUSED_PIN (req_queue_empty),
328+ `UNUSED_PIN (req_queue_pop),
329+ `UNUSED_PIN (req_queue_id),
342330
343331 // Output response
344332 .core_rsp_valid (mem_rsp_valid),
0 commit comments