f2c60e
From patchwork Thu Aug 17 12:37:06 2017
f2c60e
Content-Type: text/plain; charset="utf-8"
f2c60e
MIME-Version: 1.0
f2c60e
Content-Transfer-Encoding: 8bit
f2c60e
Subject: drm/i915: Boost GPU clocks if we miss the pageflip's vblank
f2c60e
From: Chris Wilson <chris@chris-wilson.co.uk>
f2c60e
X-Patchwork-Id: 172204
f2c60e
Message-Id: <20170817123706.6777-1-chris@chris-wilson.co.uk>
f2c60e
To: intel-gfx@lists.freedesktop.org
f2c60e
Cc: Daniel Vetter <daniel.vetter@intel.com>
f2c60e
Date: Thu, 17 Aug 2017 13:37:06 +0100
f2c60e
f2c60e
If we miss the current vblank because the gpu was busy, that may cause a
f2c60e
jitter as the frame rate temporarily drops. We try to limit the impact
f2c60e
of this by then boosting the GPU clock to deliver the frame as quickly
f2c60e
as possible. Originally done in commit 6ad790c0f5ac ("drm/i915: Boost GPU
f2c60e
frequency if we detect outstanding pageflips") but was never forward
f2c60e
ported to atomic and finally dropped in commit fd3a40242e87 ("drm/i915:
f2c60e
Rip out legacy page_flip completion/irq handling").
f2c60e
f2c60e
References: https://bugs.freedesktop.org/show_bug.cgi?id=102199
f2c60e
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
f2c60e
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
f2c60e
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
f2c60e
Cc: Daniel Vetter <daniel.vetter@intel.com>
f2c60e
Tested-by: Lyude Paul <lyude@redhat.com>
f2c60e
Reviewed-by: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
f2c60e
---
f2c60e
 drivers/gpu/drm/i915/intel_display.c | 59 ++++++++++++++++++++++++++++++++++++
f2c60e
 drivers/gpu/drm/i915/intel_drv.h     |  1 -
f2c60e
 drivers/gpu/drm/i915/intel_pm.c      | 42 ++-----------------------
f2c60e
 3 files changed, 62 insertions(+), 40 deletions(-)
f2c60e
f2c60e
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
f2c60e
index 0e93ec201fe3..7d5b19553637 100644
f2c60e
--- a/drivers/gpu/drm/i915/intel_display.c
f2c60e
+++ b/drivers/gpu/drm/i915/intel_display.c
f2c60e
@@ -12636,6 +12636,55 @@ static const struct drm_crtc_funcs intel_crtc_funcs = {
f2c60e
 	.set_crc_source = intel_crtc_set_crc_source,
f2c60e
 };
f2c60e
 
f2c60e
+struct wait_rps_boost {
f2c60e
+	struct wait_queue_entry wait;
f2c60e
+
f2c60e
+	struct drm_crtc *crtc;
f2c60e
+	struct drm_i915_gem_request *request;
f2c60e
+};
f2c60e
+
f2c60e
+static int do_rps_boost(struct wait_queue_entry *_wait,
f2c60e
+			unsigned mode, int sync, void *key)
f2c60e
+{
f2c60e
+	struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait);
f2c60e
+	struct drm_i915_gem_request *rq = wait->request;
f2c60e
+
f2c60e
+	gen6_rps_boost(rq, NULL);
f2c60e
+	i915_gem_request_put(rq);
f2c60e
+
f2c60e
+	drm_crtc_vblank_put(wait->crtc);
f2c60e
+
f2c60e
+	list_del(&wait->wait.entry);
f2c60e
+	kfree(wait);
f2c60e
+	return 1;
f2c60e
+}
f2c60e
+
f2c60e
+static void add_rps_boost_after_vblank(struct drm_crtc *crtc,
f2c60e
+				       struct dma_fence *fence)
f2c60e
+{
f2c60e
+	struct wait_rps_boost *wait;
f2c60e
+
f2c60e
+	if (!dma_fence_is_i915(fence))
f2c60e
+		return;
f2c60e
+
f2c60e
+	if (drm_crtc_vblank_get(crtc))
f2c60e
+		return;
f2c60e
+
f2c60e
+	wait = kmalloc(sizeof(*wait), GFP_KERNEL);
f2c60e
+	if (!wait) {
f2c60e
+		drm_crtc_vblank_put(crtc);
f2c60e
+		return;
f2c60e
+	}
f2c60e
+
f2c60e
+	wait->request = to_request(dma_fence_get(fence));
f2c60e
+	wait->crtc = crtc;
f2c60e
+
f2c60e
+	wait->wait.func = do_rps_boost;
f2c60e
+	wait->wait.flags = 0;
f2c60e
+
f2c60e
+	add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait);
f2c60e
+}
f2c60e
+
f2c60e
 /**
f2c60e
  * intel_prepare_plane_fb - Prepare fb for usage on plane
f2c60e
  * @plane: drm plane to prepare for
f2c60e
@@ -12733,12 +12782,22 @@ intel_prepare_plane_fb(struct drm_plane *plane,
f2c60e
 		return ret;
f2c60e
 
f2c60e
 	if (!new_state->fence) { /* implicit fencing */
f2c60e
+		struct dma_fence *fence;
f2c60e
+
f2c60e
 		ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
f2c60e
 						      obj->resv, NULL,
f2c60e
 						      false, I915_FENCE_TIMEOUT,
f2c60e
 						      GFP_KERNEL);
f2c60e
 		if (ret < 0)
f2c60e
 			return ret;
f2c60e
+
f2c60e
+		fence = reservation_object_get_excl_rcu(obj->resv);
f2c60e
+		if (fence) {
f2c60e
+			add_rps_boost_after_vblank(new_state->crtc, fence);
f2c60e
+			dma_fence_put(fence);
f2c60e
+		}
f2c60e
+	} else {
f2c60e
+		add_rps_boost_after_vblank(new_state->crtc, new_state->fence);
f2c60e
 	}
f2c60e
 
f2c60e
 	return 0;
f2c60e
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
f2c60e
index fa47285918f4..e092354b4d63 100644
f2c60e
--- a/drivers/gpu/drm/i915/intel_drv.h
f2c60e
+++ b/drivers/gpu/drm/i915/intel_drv.h
f2c60e
@@ -1844,7 +1844,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
f2c60e
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
f2c60e
 void gen6_rps_boost(struct drm_i915_gem_request *rq,
f2c60e
 		    struct intel_rps_client *rps);
f2c60e
-void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
f2c60e
 void g4x_wm_get_hw_state(struct drm_device *dev);
f2c60e
 void vlv_wm_get_hw_state(struct drm_device *dev);
f2c60e
 void ilk_wm_get_hw_state(struct drm_device *dev);
f2c60e
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
f2c60e
index ed662937ec3c..c9fa2eb1903c 100644
f2c60e
--- a/drivers/gpu/drm/i915/intel_pm.c
f2c60e
+++ b/drivers/gpu/drm/i915/intel_pm.c
f2c60e
@@ -6169,6 +6169,7 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq,
f2c60e
 		    struct intel_rps_client *rps)
f2c60e
 {
f2c60e
 	struct drm_i915_private *i915 = rq->i915;
f2c60e
+	unsigned long flags;
f2c60e
 	bool boost;
f2c60e
 
f2c60e
 	/* This is intentionally racy! We peek at the state here, then
f2c60e
@@ -6178,13 +6179,13 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq,
f2c60e
 		return;
f2c60e
 
f2c60e
 	boost = false;
f2c60e
-	spin_lock_irq(&rq->lock);
f2c60e
+	spin_lock_irqsave(&rq->lock, flags);
f2c60e
 	if (!rq->waitboost && !i915_gem_request_completed(rq)) {
f2c60e
 		atomic_inc(&i915->rps.num_waiters);
f2c60e
 		rq->waitboost = true;
f2c60e
 		boost = true;
f2c60e
 	}
f2c60e
-	spin_unlock_irq(&rq->lock);
f2c60e
+	spin_unlock_irqrestore(&rq->lock, flags);
f2c60e
 	if (!boost)
f2c60e
 		return;
f2c60e
 
f2c60e
@@ -9132,43 +9133,6 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
f2c60e
 		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
f2c60e
 }
f2c60e
 
f2c60e
-struct request_boost {
f2c60e
-	struct work_struct work;
f2c60e
-	struct drm_i915_gem_request *req;
f2c60e
-};
f2c60e
-
f2c60e
-static void __intel_rps_boost_work(struct work_struct *work)
f2c60e
-{
f2c60e
-	struct request_boost *boost = container_of(work, struct request_boost, work);
f2c60e
-	struct drm_i915_gem_request *req = boost->req;
f2c60e
-
f2c60e
-	if (!i915_gem_request_completed(req))
f2c60e
-		gen6_rps_boost(req, NULL);
f2c60e
-
f2c60e
-	i915_gem_request_put(req);
f2c60e
-	kfree(boost);
f2c60e
-}
f2c60e
-
f2c60e
-void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
f2c60e
-{
f2c60e
-	struct request_boost *boost;
f2c60e
-
f2c60e
-	if (req == NULL || INTEL_GEN(req->i915) < 6)
f2c60e
-		return;
f2c60e
-
f2c60e
-	if (i915_gem_request_completed(req))
f2c60e
-		return;
f2c60e
-
f2c60e
-	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
f2c60e
-	if (boost == NULL)
f2c60e
-		return;
f2c60e
-
f2c60e
-	boost->req = i915_gem_request_get(req);
f2c60e
-
f2c60e
-	INIT_WORK(&boost->work, __intel_rps_boost_work);
f2c60e
-	queue_work(req->i915->wq, &boost->work);
f2c60e
-}
f2c60e
-
f2c60e
 void intel_pm_setup(struct drm_i915_private *dev_priv)
f2c60e
 {
f2c60e
 	mutex_init(&dev_priv->rps.hw_lock);