Blame SOURCES/net-packet-make-tp_drops-atomic.patch

54c744
From 21d5e92b26cc9b90b522ef7dd03e5cf09167f1cc Mon Sep 17 00:00:00 2001
54c744
From: Artem Savkov <asavkov@redhat.com>
54c744
Date: Tue, 22 Sep 2020 15:48:56 +0200
54c744
Subject: [RHEL8.2 KPATCH v2] [net] packet: make tp_drops atomic
54c744
54c744
Kernels:
54c744
4.18.0-193.el8
54c744
4.18.0-193.1.2.el8_2
54c744
4.18.0-193.6.3.el8_2
54c744
4.18.0-193.13.2.el8_2
54c744
4.18.0-193.14.3.el8_2
54c744
4.18.0-193.19.1.el8_2
54c744
54c744
Changes since last build:
54c744
[x86_64]:
54c744
af_packet.o: changed function: packet_create
54c744
af_packet.o: changed function: packet_getsockopt
54c744
af_packet.o: changed function: packet_rcv
54c744
af_packet.o: changed function: packet_sock_destruct
54c744
af_packet.o: changed function: prb_retire_current_block
54c744
af_packet.o: changed function: tpacket_rcv
54c744
54c744
[ppc64le]:
54c744
af_packet.o: changed function: packet_create
54c744
af_packet.o: changed function: packet_getsockopt
54c744
af_packet.o: changed function: packet_rcv
54c744
af_packet.o: changed function: packet_sock_destruct
54c744
af_packet.o: changed function: prb_retire_current_block
54c744
af_packet.o: changed function: run_filter
54c744
af_packet.o: changed function: tpacket_rcv
54c744
54c744
---------------------------
54c744
54c744
Modifications:
54c744
 - bpf calls altered to avoid issues with jump labels
54c744
 - tp_drops as shadow variable
54c744
54c744
Testing: reproducer from bz
54c744
54c744
commit 1513be1efa2a836cb0f4309fcf1956df3faad34c
54c744
Author: Hangbin Liu <haliu@redhat.com>
54c744
Date:   Fri Sep 11 04:19:13 2020 -0400
54c744
54c744
    [net] packet: fix overflow in tpacket_rcv
54c744
54c744
    Message-id: <20200911041913.2808606-3-haliu@redhat.com>
54c744
    Patchwork-id: 326146
54c744
    Patchwork-instance: patchwork
54c744
    O-Subject: [CVE-2020-14386 RHEL8.3 net PATCH 2/2] net/packet: fix overflow in tpacket_rcv
54c744
    Bugzilla: 1876224
54c744
    Z-Bugzilla: 1876223
54c744
    CVE: CVE-2020-14386
54c744
    RH-Acked-by: Davide Caratti <dcaratti@redhat.com>
54c744
    RH-Acked-by: Marcelo Leitner <mleitner@redhat.com>
54c744
    RH-Acked-by: Jarod Wilson <jarod@redhat.com>
54c744
    RH-Acked-by: Paolo Abeni <pabeni@redhat.com>
54c744
    RH-Acked-by: Ivan Vecera <ivecera@redhat.com>
54c744
54c744
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1876224
54c744
    Brew: https://brewweb.devel.redhat.com/taskinfo?taskID=31276277
54c744
    Upstream Status: net.git commit acf69c946233
54c744
    CVE: CVE-2020-14386
54c744
54c744
    commit acf69c946233259ab4d64f8869d4037a198c7f06
54c744
    Author: Or Cohen <orcohen@paloaltonetworks.com>
54c744
    Date:   Thu Sep 3 21:05:28 2020 -0700
54c744
54c744
        net/packet: fix overflow in tpacket_rcv
54c744
54c744
        Using tp_reserve to calculate netoff can overflow as
54c744
        tp_reserve is unsigned int and netoff is unsigned short.
54c744
54c744
        This may lead to macoff receving a smaller value then
54c744
        sizeof(struct virtio_net_hdr), and if po->has_vnet_hdr
54c744
        is set, an out-of-bounds write will occur when
54c744
        calling virtio_net_hdr_from_skb.
54c744
54c744
        The bug is fixed by converting netoff to unsigned int
54c744
        and checking if it exceeds USHRT_MAX.
54c744
54c744
        This addresses CVE-2020-14386
54c744
54c744
        Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt")
54c744
        Signed-off-by: Or Cohen <orcohen@paloaltonetworks.com>
54c744
        Signed-off-by: Eric Dumazet <edumazet@google.com>
54c744
        Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
54c744
54c744
    Signed-off-by: Hangbin Liu <haliu@redhat.com>
54c744
    Signed-off-by: Timothy Redaelli <tredaelli@redhat.com>
54c744
    Signed-off-by: Bruno Meneguele <bmeneg@redhat.com>
54c744
54c744
commit 5d07c2093eec0b75b60f6087a6c1b1f79c46e20c
54c744
Author: Hangbin Liu <haliu@redhat.com>
54c744
Date:   Fri Sep 11 04:19:12 2020 -0400
54c744
54c744
    [net] packet: make tp_drops atomic
54c744
54c744
    Message-id: <20200911041913.2808606-2-haliu@redhat.com>
54c744
    Patchwork-id: 326145
54c744
    Patchwork-instance: patchwork
54c744
    O-Subject: [CVE-2020-14386 RHEL8.3 net PATCH 1/2] net/packet: make tp_drops atomic
54c744
    Bugzilla: 1876224
54c744
    Z-Bugzilla: 1876223
54c744
    CVE: CVE-2020-14386
54c744
    RH-Acked-by: Davide Caratti <dcaratti@redhat.com>
54c744
    RH-Acked-by: Marcelo Leitner <mleitner@redhat.com>
54c744
    RH-Acked-by: Jarod Wilson <jarod@redhat.com>
54c744
    RH-Acked-by: Paolo Abeni <pabeni@redhat.com>
54c744
    RH-Acked-by: Ivan Vecera <ivecera@redhat.com>
54c744
54c744
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1876224
54c744
    Brew: https://brewweb.devel.redhat.com/taskinfo?taskID=31276277
54c744
    Upstream Status: net.git commit 8e8e2951e309
54c744
54c744
    commit 8e8e2951e3095732d7e780c241f61ea130955a57
54c744
    Author: Eric Dumazet <edumazet@google.com>
54c744
    Date:   Wed Jun 12 09:52:30 2019 -0700
54c744
54c744
        net/packet: make tp_drops atomic
54c744
54c744
        Under DDOS, we want to be able to increment tp_drops without
54c744
        touching the spinlock. This will help readers to drain
54c744
        the receive queue slightly faster :/
54c744
54c744
        Signed-off-by: Eric Dumazet <edumazet@google.com>
54c744
        Signed-off-by: David S. Miller <davem@davemloft.net>
54c744
54c744
    Signed-off-by: Hangbin Liu <haliu@redhat.com>
54c744
    Signed-off-by: Timothy Redaelli <tredaelli@redhat.com>
54c744
    Signed-off-by: Bruno Meneguele <bmeneg@redhat.com>
54c744
54c744
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
54c744
Acked-by: Yannick Cote <ycote@redhat.com>
54c744
Signed-off-by: Artem Savkov <asavkov@redhat.com>
54c744
54c744
---
54c744
 net/packet/af_packet.c | 118 ++++++++++++++++++++++++++++++++++++-----
54c744
 1 file changed, 106 insertions(+), 12 deletions(-)
54c744
54c744
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
54c744
index d69fb2077196..4c67f7156a17 100644
54c744
--- a/net/packet/af_packet.c
54c744
+++ b/net/packet/af_packet.c
54c744
@@ -185,6 +185,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
54c744
 #define BLOCK_O2PRIV(x)	((x)->offset_to_priv)
54c744
 #define BLOCK_PRIV(x)		((void *)((char *)(x) + BLOCK_O2PRIV(x)))
54c744
 
54c744
+#define KLP_SHADOW_TP_DROPS 0x2020143860000000
54c744
+
54c744
 struct packet_sock;
54c744
 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 		       struct packet_type *pt, struct net_device *orig_dev);
54c744
@@ -747,6 +749,8 @@ static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
54c744
 #endif
54c744
 }
54c744
 
54c744
+#include "kpatch-macros.h"
54c744
+
54c744
 /*
54c744
  * Side effect:
54c744
  *
54c744
@@ -765,8 +769,9 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
54c744
 	struct tpacket3_hdr *last_pkt;
54c744
 	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
54c744
 	struct sock *sk = &po->sk;
54c744
+	atomic_t *tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS);
54c744
 
54c744
-	if (po->stats.stats3.tp_drops)
54c744
+	if (tp_drops && atomic_read(tp_drops))
54c744
 		status |= TP_STATUS_LOSING;
54c744
 
54c744
 	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
54c744
@@ -1281,6 +1286,8 @@ static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
54c744
 
54c744
 static void packet_sock_destruct(struct sock *sk)
54c744
 {
54c744
+	struct packet_sock *po = pkt_sk(sk);
54c744
+
54c744
 	skb_queue_purge(&sk->sk_error_queue);
54c744
 
54c744
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
54c744
@@ -1291,6 +1298,8 @@ static void packet_sock_destruct(struct sock *sk)
54c744
 		return;
54c744
 	}
54c744
 
54c744
+	klp_shadow_free(po, KLP_SHADOW_TP_DROPS, NULL);
54c744
+
54c744
 	sk_refcnt_debug_dec(sk);
54c744
 }
54c744
 
54c744
@@ -1994,6 +2003,38 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
54c744
 	return err;
54c744
 }
54c744
 
54c744
+#define BPF_PROG_RUN_KPATCH(prog, ctx)	({				\
54c744
+	u32 ret;						\
54c744
+	cant_sleep();						\
54c744
+	if (static_key_enabled(&bpf_stats_enabled_key)) {	\
54c744
+		struct bpf_prog_stats *stats;			\
54c744
+		u64 start = sched_clock();			\
54c744
+		ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi);	\
54c744
+		stats = this_cpu_ptr(prog->aux->stats);		\
54c744
+		u64_stats_update_begin(&stats->syncp);		\
54c744
+		stats->cnt++;					\
54c744
+		stats->nsecs += sched_clock() - start;		\
54c744
+		u64_stats_update_end(&stats->syncp);		\
54c744
+	} else {						\
54c744
+		ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi);	\
54c744
+	}							\
54c744
+	ret; })
54c744
+
54c744
+static inline u32 bpf_prog_run_clear_cb_kpatch(const struct bpf_prog *prog,
54c744
+					struct sk_buff *skb)
54c744
+{
54c744
+	u8 *cb_data = bpf_skb_cb(skb);
54c744
+	u32 res;
54c744
+
54c744
+	if (unlikely(prog->cb_access))
54c744
+		memset(cb_data, 0, BPF_SKB_CB_LEN);
54c744
+
54c744
+	preempt_disable();
54c744
+	res = BPF_PROG_RUN_KPATCH(prog, skb);
54c744
+	preempt_enable();
54c744
+	return res;
54c744
+}
54c744
+
54c744
 static unsigned int run_filter(struct sk_buff *skb,
54c744
 			       const struct sock *sk,
54c744
 			       unsigned int res)
54c744
@@ -2003,7 +2044,7 @@ static unsigned int run_filter(struct sk_buff *skb,
54c744
 	rcu_read_lock();
54c744
 	filter = rcu_dereference(sk->sk_filter);
54c744
 	if (filter != NULL)
54c744
-		res = bpf_prog_run_clear_cb(filter->prog, skb);
54c744
+		res = bpf_prog_run_clear_cb_kpatch(filter->prog, skb);
54c744
 	rcu_read_unlock();
54c744
 
54c744
 	return res;
54c744
@@ -2046,6 +2087,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 	int skb_len = skb->len;
54c744
 	unsigned int snaplen, res;
54c744
 	bool is_drop_n_account = false;
54c744
+	atomic_t *tp_drops;
54c744
 
54c744
 	if (skb->pkt_type == PACKET_LOOPBACK)
54c744
 		goto drop;
54c744
@@ -2053,6 +2095,17 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 	sk = pt->af_packet_priv;
54c744
 	po = pkt_sk(sk);
54c744
 
54c744
+	tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS);
54c744
+	if (!tp_drops) {
54c744
+		tp_drops = klp_shadow_alloc(po, KLP_SHADOW_TP_DROPS,
54c744
+					    sizeof(atomic_t*), GFP_ATOMIC,
54c744
+					    NULL, NULL);
54c744
+		if (!tp_drops)
54c744
+			goto drop;
54c744
+
54c744
+		atomic_set(tp_drops, po->stats.stats1.tp_drops);
54c744
+	}
54c744
+
54c744
 	if (!net_eq(dev_net(dev), sock_net(sk)))
54c744
 		goto drop;
54c744
 
54c744
@@ -2135,10 +2188,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 
54c744
 drop_n_acct:
54c744
 	is_drop_n_account = true;
54c744
-	spin_lock(&sk->sk_receive_queue.lock);
54c744
-	po->stats.stats1.tp_drops++;
54c744
+	atomic_inc(tp_drops);
54c744
 	atomic_inc(&sk->sk_drops);
54c744
-	spin_unlock(&sk->sk_receive_queue.lock);
54c744
 
54c744
 drop_n_restore:
54c744
 	if (skb_head != skb->data && skb_shared(skb)) {
54c744
@@ -2164,12 +2215,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 	int skb_len = skb->len;
54c744
 	unsigned int snaplen, res;
54c744
 	unsigned long status = TP_STATUS_USER;
54c744
-	unsigned short macoff, netoff, hdrlen;
54c744
+	unsigned short macoff, hdrlen;
54c744
+	unsigned int netoff;
54c744
 	struct sk_buff *copy_skb = NULL;
54c744
 	struct timespec ts;
54c744
 	__u32 ts_status;
54c744
 	bool is_drop_n_account = false;
54c744
 	bool do_vnet = false;
54c744
+	atomic_t *tp_drops;
54c744
 
54c744
 	/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
54c744
 	 * We may add members to them until current aligned size without forcing
54c744
@@ -2184,6 +2237,17 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 	sk = pt->af_packet_priv;
54c744
 	po = pkt_sk(sk);
54c744
 
54c744
+	tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS);
54c744
+	if (!tp_drops) {
54c744
+		tp_drops = klp_shadow_alloc(po, KLP_SHADOW_TP_DROPS,
54c744
+					    sizeof(atomic_t*), GFP_ATOMIC,
54c744
+					    NULL, NULL);
54c744
+		if (!tp_drops)
54c744
+			goto drop;
54c744
+
54c744
+		atomic_set(tp_drops, po->stats.stats1.tp_drops);
54c744
+	}
54c744
+
54c744
 	if (!net_eq(dev_net(dev), sock_net(sk)))
54c744
 		goto drop;
54c744
 
54c744
@@ -2226,6 +2290,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 		}
54c744
 		macoff = netoff - maclen;
54c744
 	}
54c744
+	if (netoff > USHRT_MAX) {
54c744
+		atomic_inc(tp_drops);
54c744
+		goto drop_n_restore;
54c744
+	}
54c744
 	if (po->tp_version <= TPACKET_V2) {
54c744
 		if (macoff + snaplen > po->rx_ring.frame_size) {
54c744
 			if (po->copy_thresh &&
54c744
@@ -2272,7 +2340,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 	 * Anyways, moving it for V1/V2 only as V3 doesn't need this
54c744
 	 * at packet level.
54c744
 	 */
54c744
-		if (po->stats.stats1.tp_drops)
54c744
+		if (atomic_read(tp_drops))
54c744
 			status |= TP_STATUS_LOSING;
54c744
 	}
54c744
 
54c744
@@ -2388,9 +2456,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
54c744
 	return 0;
54c744
 
54c744
 drop_n_account:
54c744
-	is_drop_n_account = true;
54c744
-	po->stats.stats1.tp_drops++;
54c744
 	spin_unlock(&sk->sk_receive_queue.lock);
54c744
+	atomic_inc(tp_drops);
54c744
+	is_drop_n_account = true;
54c744
 
54c744
 	sk->sk_data_ready(sk);
54c744
 	kfree_skb(copy_skb);
54c744
@@ -3195,6 +3263,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
54c744
 	struct sock *sk;
54c744
 	struct packet_sock *po;
54c744
 	__be16 proto = (__force __be16)protocol; /* weird, but documented */
54c744
+	atomic_t *tp_drops;
54c744
 	int err;
54c744
 
54c744
 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
54c744
@@ -3221,9 +3290,16 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
54c744
 	po->num = proto;
54c744
 	po->xmit = dev_queue_xmit;
54c744
 
54c744
+	tp_drops = klp_shadow_get_or_alloc(po, KLP_SHADOW_TP_DROPS,
54c744
+					   sizeof(atomic_t*), GFP_KERNEL,
54c744
+					   NULL, NULL);
54c744
+
54c744
+	if (!tp_drops)
54c744
+		goto out2;
54c744
+
54c744
 	err = packet_alloc_pending(po);
54c744
 	if (err)
54c744
-		goto out2;
54c744
+		goto out3;
54c744
 
54c744
 	packet_cached_dev_reset(po);
54c744
 
54c744
@@ -3258,6 +3334,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
54c744
 	preempt_enable();
54c744
 
54c744
 	return 0;
54c744
+out3:
54c744
+	klp_shadow_free(po, KLP_SHADOW_TP_DROPS, NULL);
54c744
 out2:
54c744
 	sk_free(sk);
54c744
 out:
54c744
@@ -3873,6 +3951,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
54c744
 	void *data = &val;
54c744
 	union tpacket_stats_u st;
54c744
 	struct tpacket_rollover_stats rstats;
54c744
+	int drops;
54c744
+	atomic_t *tp_drops;
54c744
 
54c744
 	if (level != SOL_PACKET)
54c744
 		return -ENOPROTOOPT;
54c744
@@ -3883,20 +3963,34 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
54c744
 	if (len < 0)
54c744
 		return -EINVAL;
54c744
 
54c744
+	tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS);
54c744
+	if (!tp_drops) {
54c744
+		tp_drops = klp_shadow_alloc(po, KLP_SHADOW_TP_DROPS,
54c744
+					    sizeof(atomic_t*), GFP_ATOMIC,
54c744
+					    NULL, NULL);
54c744
+		if (!tp_drops)
54c744
+			return -ENOMEM;
54c744
+
54c744
+		atomic_set(tp_drops, po->stats.stats1.tp_drops);
54c744
+	}
54c744
+
54c744
 	switch (optname) {
54c744
 	case PACKET_STATISTICS:
54c744
 		spin_lock_bh(&sk->sk_receive_queue.lock);
54c744
 		memcpy(&st, &po->stats, sizeof(st));
54c744
 		memset(&po->stats, 0, sizeof(po->stats));
54c744
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
54c744
+		drops = atomic_xchg(tp_drops, 0);
54c744
 
54c744
 		if (po->tp_version == TPACKET_V3) {
54c744
 			lv = sizeof(struct tpacket_stats_v3);
54c744
-			st.stats3.tp_packets += st.stats3.tp_drops;
54c744
+			st.stats3.tp_drops = drops;
54c744
+			st.stats3.tp_packets += drops;
54c744
 			data = &st.stats3;
54c744
 		} else {
54c744
 			lv = sizeof(struct tpacket_stats);
54c744
-			st.stats1.tp_packets += st.stats1.tp_drops;
54c744
+			st.stats1.tp_drops = drops;
54c744
+			st.stats1.tp_packets += drops;
54c744
 			data = &st.stats1;
54c744
 		}
54c744
 
54c744
-- 
54c744
2.26.2
54c744