Johnny Hughes
2019-02-04 c1f36c28393a7bb126cbf436cd6a4077a5b5c313
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
From 76fae3a0458b958e4478ba485e9d71b2ee6a5667 Mon Sep 17 00:00:00 2001
From: Yan Markman <ymarkman@marvell.com>
Date: Tue, 27 Mar 2018 16:49:05 +0200
Subject: [PATCH 41/46] net: mvpp2: Use relaxed I/O in data path
 
Use relaxed I/O on the hot path. This achieves significant performance
improvements. On a 10G link, this makes a basic iperf TCP test go from
an average of 4.5 Gbits/sec to about 9.40 Gbits/sec.
 
Signed-off-by: Yan Markman <ymarkman@marvell.com>
[Maxime: Commit message, cosmetic changes]
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
 
(cherry picked from commit cdcfeb0fb473e34e012b9a78b5cb377a6ad1434d)
Signed-off-by: Marcin Wojtas <mw@semihalf.com>
---
 drivers/net/ethernet/marvell/mvpp2.c | 43 +++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 13 deletions(-)
 
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 7075e5a..7fc1bbf 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1359,6 +1359,10 @@ static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
     return readl(priv->swth_base[0] + offset);
 }
 
+static u32 mvpp2_read_relaxed(struct mvpp2 *priv, u32 offset)
+{
+    return readl_relaxed(priv->swth_base[0] + offset);
+}
 /* These accessors should be used to access:
  *
  * - per-CPU registers, where each CPU has its own copy of the
@@ -1407,6 +1411,18 @@ static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
     return readl(priv->swth_base[cpu] + offset);
 }
 
+static void mvpp2_percpu_write_relaxed(struct mvpp2 *priv, int cpu,
+                       u32 offset, u32 data)
+{
+    writel_relaxed(data, priv->swth_base[cpu] + offset);
+}
+
+static u32 mvpp2_percpu_read_relaxed(struct mvpp2 *priv, int cpu,
+                     u32 offset)
+{
+    return readl_relaxed(priv->swth_base[cpu] + offset);
+}
+
 static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
                         struct mvpp2_tx_desc *tx_desc)
 {
@@ -4442,8 +4458,8 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
                 << MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT) &
                 MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK;
 
-        mvpp2_percpu_write(port->priv, cpu,
-                   MVPP22_BM_ADDR_HIGH_RLS_REG, val);
+        mvpp2_percpu_write_relaxed(port->priv, cpu,
+                       MVPP22_BM_ADDR_HIGH_RLS_REG, val);
     }
 
     /* MVPP2_BM_VIRT_RLS_REG is not interpreted by HW, and simply
@@ -4451,10 +4467,10 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
      * descriptor. Instead of storing the virtual address, we
      * store the physical address
      */
-    mvpp2_percpu_write(port->priv, cpu,
-               MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
-    mvpp2_percpu_write(port->priv, cpu,
-               MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
+    mvpp2_percpu_write_relaxed(port->priv, cpu,
+                   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
+    mvpp2_percpu_write_relaxed(port->priv, cpu,
+                   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
 
     put_cpu();
 }
@@ -5546,7 +5562,8 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
     if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) {
         /* Update number of occupied aggregated Tx descriptors */
         int cpu = smp_processor_id();
-        u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu));
+        u32 val = mvpp2_read_relaxed(priv,
+                         MVPP2_AGGR_TXQ_STATUS_REG(cpu));
 
         aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
     }
@@ -5570,9 +5587,9 @@ static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
     int cpu = smp_processor_id();
 
     val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
-    mvpp2_percpu_write(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
+    mvpp2_percpu_write_relaxed(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
 
-    val = mvpp2_percpu_read(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
+    val = mvpp2_percpu_read_relaxed(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
 
     return val & MVPP2_TXQ_RSVD_RSLT_MASK;
 }
@@ -5677,8 +5694,8 @@ static inline int mvpp2_txq_sent_desc_proc(struct mvpp2_port *port,
     u32 val;
 
     /* Reading status reg resets transmitted descriptor counter */
-    val = mvpp2_percpu_read(port->priv, smp_processor_id(),
-                MVPP2_TXQ_SENT_REG(txq->id));
+    val = mvpp2_percpu_read_relaxed(port->priv, smp_processor_id(),
+                    MVPP2_TXQ_SENT_REG(txq->id));
 
     return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
         MVPP2_TRANSMITTED_COUNT_OFFSET;
@@ -7044,8 +7061,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
      *
      * Each CPU has its own Rx/Tx cause register
      */
-    cause_rx_tx = mvpp2_percpu_read(port->priv, qv->sw_thread_id,
-                    MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
+    cause_rx_tx = mvpp2_percpu_read_relaxed(port->priv, qv->sw_thread_id,
+                        MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
 
     cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
     if (cause_misc) {
-- 
2.7.4