chengshan / rpms / kernel

Forked from rpms/kernel 2 years ago
Clone
Pablo Greco d6c4c4
From 90de578c81e983b3d992ca3e1a7e5910c803abba Mon Sep 17 00:00:00 2001
Pablo Greco d6c4c4
From: Chris Wilson <chris@chris-wilson.co.uk>
Pablo Greco d6c4c4
Date: Mon, 30 Dec 2019 11:15:30 +0000
Pablo Greco d6c4c4
Subject: [PATCH] drm/i915/gt: Detect if we miss WaIdleLiteRestore
Pablo Greco d6c4c4
Pablo Greco d6c4c4
In order to avoid confusing the HW, we must never submit an empty ring
Pablo Greco d6c4c4
during lite-restore, that is we should always advance the RING_TAIL
Pablo Greco d6c4c4
before submitting to stay ahead of the RING_HEAD.
Pablo Greco d6c4c4
Pablo Greco d6c4c4
Normally this is prevented by keeping a couple of spare NOPs in the
Pablo Greco d6c4c4
request->wa_tail so that on resubmission we can advance the tail. This
Pablo Greco d6c4c4
relies on the request only being resubmitted once, which is the normal
Pablo Greco d6c4c4
condition as it is seen once for ELSP[1] and then later in ELSP[0]. On
Pablo Greco d6c4c4
preemption, the requests are unwound and the tail reset back to the
Pablo Greco d6c4c4
normal end point (as we know the request is incomplete and therefore its
Pablo Greco d6c4c4
RING_HEAD is even earlier).
Pablo Greco d6c4c4
Pablo Greco d6c4c4
However, if this w/a should fail we would try and resubmit the request
Pablo Greco d6c4c4
with the RING_TAIL already set to the location of this request's wa_tail
Pablo Greco d6c4c4
potentially causing a GPU hang. We can spot when we do try and
Pablo Greco d6c4c4
incorrectly resubmit without advancing the RING_TAIL and spare any
Pablo Greco d6c4c4
embarrassment by forcing the context restore.
Pablo Greco d6c4c4
Pablo Greco d6c4c4
In the case of preempt-to-busy, we leave the requests running on the HW
Pablo Greco d6c4c4
while we unwind. As the ring is still live, we cannot rewind our
Pablo Greco d6c4c4
rq->tail without forcing a reload so leave it set to rq->wa_tail and
Pablo Greco d6c4c4
only force a reload if we resubmit after a lite-restore. (Normally, the
Pablo Greco d6c4c4
forced reload will be a part of the preemption event.)
Pablo Greco d6c4c4
Pablo Greco d6c4c4
Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
Pablo Greco d6c4c4
Closes: https://gitlab.freedesktop.org/drm/intel/issues/673
Pablo Greco d6c4c4
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Pablo Greco d6c4c4
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Pablo Greco d6c4c4
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Pablo Greco d6c4c4
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Pablo Greco d6c4c4
Cc: stable@vger.kernel.org
Pablo Greco d6c4c4
Link: https://patchwork.freedesktop.org/patch/msgid/20191209023215.3519970-1-chris@chris-wilson.co.uk
Pablo Greco d6c4c4
(cherry picked from commit 82c69bf58650e644c61aa2bf5100b63a1070fd2f)
Pablo Greco d6c4c4
---
Pablo Greco d6c4c4
 drivers/gpu/drm/i915/gt/intel_lrc.c | 42 ++++++++++++++---------------
Pablo Greco d6c4c4
 1 file changed, 20 insertions(+), 22 deletions(-)
Pablo Greco d6c4c4
Pablo Greco d6c4c4
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
Pablo Greco d6c4c4
index d564bfcab6a3..49ce15553e7b 100644
Pablo Greco d6c4c4
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
Pablo Greco d6c4c4
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
Pablo Greco d6c4c4
@@ -471,12 +471,6 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
Pablo Greco d6c4c4
 	return desc;
Pablo Greco d6c4c4
 }
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
-static void unwind_wa_tail(struct i915_request *rq)
Pablo Greco d6c4c4
-{
Pablo Greco d6c4c4
-	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
Pablo Greco d6c4c4
-	assert_ring_tail_valid(rq->ring, rq->tail);
Pablo Greco d6c4c4
-}
Pablo Greco d6c4c4
-
Pablo Greco d6c4c4
 static struct i915_request *
Pablo Greco d6c4c4
 __unwind_incomplete_requests(struct intel_engine_cs *engine)
Pablo Greco d6c4c4
 {
Pablo Greco d6c4c4
@@ -495,7 +489,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
Pablo Greco d6c4c4
 			continue; /* XXX */
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
 		__i915_request_unsubmit(rq);
Pablo Greco d6c4c4
-		unwind_wa_tail(rq);
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
 		/*
Pablo Greco d6c4c4
 		 * Push the request back into the queue for later resubmission.
Pablo Greco d6c4c4
@@ -650,13 +643,29 @@ execlists_schedule_out(struct i915_request *rq)
Pablo Greco d6c4c4
 	i915_request_put(rq);
Pablo Greco d6c4c4
 }
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
-static u64 execlists_update_context(const struct i915_request *rq)
Pablo Greco d6c4c4
+static u64 execlists_update_context(struct i915_request *rq)
Pablo Greco d6c4c4
 {
Pablo Greco d6c4c4
 	struct intel_context *ce = rq->hw_context;
Pablo Greco d6c4c4
-	u64 desc;
Pablo Greco d6c4c4
+	u64 desc = ce->lrc_desc;
Pablo Greco d6c4c4
+	u32 tail;
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
Pablo Greco d6c4c4
-		intel_ring_set_tail(rq->ring, rq->tail);
Pablo Greco d6c4c4
+	/*
Pablo Greco d6c4c4
+	 * WaIdleLiteRestore:bdw,skl
Pablo Greco d6c4c4
+	 *
Pablo Greco d6c4c4
+	 * We should never submit the context with the same RING_TAIL twice
Pablo Greco d6c4c4
+	 * just in case we submit an empty ring, which confuses the HW.
Pablo Greco d6c4c4
+	 *
Pablo Greco d6c4c4
+	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
Pablo Greco d6c4c4
+	 * the normal request to be able to always advance the RING_TAIL on
Pablo Greco d6c4c4
+	 * subsequent resubmissions (for lite restore). Should that fail us,
Pablo Greco d6c4c4
+	 * and we try and submit the same tail again, force the context
Pablo Greco d6c4c4
+	 * reload.
Pablo Greco d6c4c4
+	 */
Pablo Greco d6c4c4
+	tail = intel_ring_set_tail(rq->ring, rq->tail);
Pablo Greco d6c4c4
+	if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL + 1] == tail))
Pablo Greco d6c4c4
+		desc |= CTX_DESC_FORCE_RESTORE;
Pablo Greco d6c4c4
+	ce->lrc_reg_state[CTX_RING_TAIL + 1] = tail;
Pablo Greco d6c4c4
+	rq->tail = rq->wa_tail;
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
 	/*
Pablo Greco d6c4c4
 	 * Make sure the context image is complete before we submit it to HW.
Pablo Greco d6c4c4
@@ -675,7 +684,6 @@ static u64 execlists_update_context(const struct i915_request *rq)
Pablo Greco d6c4c4
 	 */
Pablo Greco d6c4c4
 	mb();
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
-	desc = ce->lrc_desc;
Pablo Greco d6c4c4
 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
 	return desc;
Pablo Greco d6c4c4
@@ -1150,16 +1158,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
Pablo Greco d6c4c4
 			if (!list_is_last(&last->sched.link,
Pablo Greco d6c4c4
 					  &engine->active.requests))
Pablo Greco d6c4c4
 				return;
Pablo Greco d6c4c4
-
Pablo Greco d6c4c4
-			/*
Pablo Greco d6c4c4
-			 * WaIdleLiteRestore:bdw,skl
Pablo Greco d6c4c4
-			 * Apply the wa NOOPs to prevent
Pablo Greco d6c4c4
-			 * ring:HEAD == rq:TAIL as we resubmit the
Pablo Greco d6c4c4
-			 * request. See gen8_emit_fini_breadcrumb() for
Pablo Greco d6c4c4
-			 * where we prepare the padding after the
Pablo Greco d6c4c4
-			 * end of the request.
Pablo Greco d6c4c4
-			 */
Pablo Greco d6c4c4
-			last->tail = last->wa_tail;
Pablo Greco d6c4c4
 		}
Pablo Greco d6c4c4
 	}
Pablo Greco d6c4c4
 
Pablo Greco d6c4c4
-- 
Pablo Greco d6c4c4
2.24.1
Pablo Greco d6c4c4