Blame SOURCES/0001-efa-Flush-write-combining-writes-before-writing-to-t.patch

0e3d1a
From 4b7203f835727e9314ef42db682b578730783d7d Mon Sep 17 00:00:00 2001
0e3d1a
From: Gal Pressman <galpress@amazon.com>
0e3d1a
Date: Wed, 11 Nov 2020 14:21:13 +0200
0e3d1a
Subject: [PATCH] efa: Flush write combining writes before writing to the LLQ
0e3d1a
0e3d1a
[ Upstream commit 9a0d3830da11a187fb6bffe4f6f361560a0b2f40 ]
0e3d1a
0e3d1a
An mmio_wc_start() is needed before writing to the LLQ memory in order
0e3d1a
to prevent the WQEs copy (WC memory) from being reordered relative to
0e3d1a
other mmio writes, such as tx doorbells (NC memory).
0e3d1a
0e3d1a
This prevents the provider to issue more than max_tx_batch LLQ writes
0e3d1a
between two doorbells. This is especially relevant when the user calls
0e3d1a
the _post API with more WQEs than max_tx_batch.
0e3d1a
0e3d1a
Fixes: 7aad28d11981 ("efa: Respect maximum TX doorbell batch")
0e3d1a
Signed-off-by: Shadi Ammouri <sammouri@amazon.com>
0e3d1a
Signed-off-by: Gal Pressman <galpress@amazon.com>
0e3d1a
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
0e3d1a
---
0e3d1a
 providers/efa/verbs.c | 14 +++++++++++---
0e3d1a
 1 file changed, 11 insertions(+), 3 deletions(-)
0e3d1a
0e3d1a
diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c
0e3d1a
index e179ff24e911..e80660d1907f 100644
0e3d1a
--- a/providers/efa/verbs.c
0e3d1a
+++ b/providers/efa/verbs.c
0e3d1a
@@ -1389,7 +1389,6 @@ static inline void efa_rq_ring_doorbell(struct efa_rq *rq, uint16_t pc)
0e3d1a
 
0e3d1a
 static inline void efa_sq_ring_doorbell(struct efa_sq *sq, uint16_t pc)
0e3d1a
 {
0e3d1a
-	mmio_flush_writes();
0e3d1a
 	mmio_write32(sq->wq.db, pc);
0e3d1a
 }
0e3d1a
 
0e3d1a
@@ -1510,15 +1509,19 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
0e3d1a
 
0e3d1a
 		if (curbatch == qp->sq.max_batch_wr) {
0e3d1a
 			curbatch = 0;
0e3d1a
+			mmio_flush_writes();
0e3d1a
 			efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);
0e3d1a
+			mmio_wc_start();
0e3d1a
 		}
0e3d1a
 
0e3d1a
 		wr = wr->next;
0e3d1a
 	}
0e3d1a
 
0e3d1a
 ring_db:
0e3d1a
-	if (curbatch)
0e3d1a
+	if (curbatch) {
0e3d1a
+		mmio_flush_writes();
0e3d1a
 		efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);
0e3d1a
+	}
0e3d1a
 
0e3d1a
 	/*
0e3d1a
 	 * Not using mmio_wc_spinunlock as the doorbell write should be done
0e3d1a
@@ -1774,6 +1777,7 @@ static int efa_send_wr_complete(struct ibv_qp_ex *ibvqpx)
0e3d1a
 	pc = qp->sq.wq.pc - qp->sq.num_wqe_pending;
0e3d1a
 	sq_desc_idx = pc & qp->sq.wq.desc_mask;
0e3d1a
 
0e3d1a
+	/* mmio_wc_start() comes from efa_send_wr_start() */
0e3d1a
 	while (qp->sq.num_wqe_pending) {
0e3d1a
 		num_wqe_to_copy = min3(qp->sq.num_wqe_pending,
0e3d1a
 				       qp->sq.wq.wqe_cnt - sq_desc_idx,
0e3d1a
@@ -1792,13 +1796,17 @@ static int efa_send_wr_complete(struct ibv_qp_ex *ibvqpx)
0e3d1a
 			      qp->sq.wq.desc_mask;
0e3d1a
 
0e3d1a
 		if (curbatch == max_txbatch) {
0e3d1a
+			mmio_flush_writes();
0e3d1a
 			efa_sq_ring_doorbell(&qp->sq, pc);
0e3d1a
 			curbatch = 0;
0e3d1a
+			mmio_wc_start();
0e3d1a
 		}
0e3d1a
 	}
0e3d1a
 
0e3d1a
-	if (curbatch)
0e3d1a
+	if (curbatch) {
0e3d1a
+		mmio_flush_writes();
0e3d1a
 		efa_sq_ring_doorbell(&qp->sq, qp->sq.wq.pc);
0e3d1a
+	}
0e3d1a
 out:
0e3d1a
 	/*
0e3d1a
 	 * Not using mmio_wc_spinunlock as the doorbell write should be done
0e3d1a
-- 
0e3d1a
2.25.4
0e3d1a