From 21d5e92b26cc9b90b522ef7dd03e5cf09167f1cc Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 22 Sep 2020 15:48:56 +0200 Subject: [RHEL8.2 KPATCH v2] [net] packet: make tp_drops atomic Kernels: 4.18.0-193.el8 4.18.0-193.1.2.el8_2 4.18.0-193.6.3.el8_2 4.18.0-193.13.2.el8_2 4.18.0-193.14.3.el8_2 4.18.0-193.19.1.el8_2 Changes since last build: [x86_64]: af_packet.o: changed function: packet_create af_packet.o: changed function: packet_getsockopt af_packet.o: changed function: packet_rcv af_packet.o: changed function: packet_sock_destruct af_packet.o: changed function: prb_retire_current_block af_packet.o: changed function: tpacket_rcv [ppc64le]: af_packet.o: changed function: packet_create af_packet.o: changed function: packet_getsockopt af_packet.o: changed function: packet_rcv af_packet.o: changed function: packet_sock_destruct af_packet.o: changed function: prb_retire_current_block af_packet.o: changed function: run_filter af_packet.o: changed function: tpacket_rcv --------------------------- Modifications: - bpf calls altered to avoid issues with jump labels - tp_drops as shadow variable Testing: reproducer from bz commit 1513be1efa2a836cb0f4309fcf1956df3faad34c Author: Hangbin Liu Date: Fri Sep 11 04:19:13 2020 -0400 [net] packet: fix overflow in tpacket_rcv Message-id: <20200911041913.2808606-3-haliu@redhat.com> Patchwork-id: 326146 Patchwork-instance: patchwork O-Subject: [CVE-2020-14386 RHEL8.3 net PATCH 2/2] net/packet: fix overflow in tpacket_rcv Bugzilla: 1876224 Z-Bugzilla: 1876223 CVE: CVE-2020-14386 RH-Acked-by: Davide Caratti RH-Acked-by: Marcelo Leitner RH-Acked-by: Jarod Wilson RH-Acked-by: Paolo Abeni RH-Acked-by: Ivan Vecera Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1876224 Brew: https://brewweb.devel.redhat.com/taskinfo?taskID=31276277 Upstream Status: net.git commit acf69c946233 CVE: CVE-2020-14386 commit acf69c946233259ab4d64f8869d4037a198c7f06 Author: Or Cohen Date: Thu Sep 3 21:05:28 2020 -0700 net/packet: fix overflow in tpacket_rcv Using tp_reserve to calculate netoff can overflow as tp_reserve is unsigned int and netoff is unsigned short. This may lead to macoff receving a smaller value then sizeof(struct virtio_net_hdr), and if po->has_vnet_hdr is set, an out-of-bounds write will occur when calling virtio_net_hdr_from_skb. The bug is fixed by converting netoff to unsigned int and checking if it exceeds USHRT_MAX. This addresses CVE-2020-14386 Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt") Signed-off-by: Or Cohen Signed-off-by: Eric Dumazet Signed-off-by: Linus Torvalds Signed-off-by: Hangbin Liu Signed-off-by: Timothy Redaelli Signed-off-by: Bruno Meneguele commit 5d07c2093eec0b75b60f6087a6c1b1f79c46e20c Author: Hangbin Liu Date: Fri Sep 11 04:19:12 2020 -0400 [net] packet: make tp_drops atomic Message-id: <20200911041913.2808606-2-haliu@redhat.com> Patchwork-id: 326145 Patchwork-instance: patchwork O-Subject: [CVE-2020-14386 RHEL8.3 net PATCH 1/2] net/packet: make tp_drops atomic Bugzilla: 1876224 Z-Bugzilla: 1876223 CVE: CVE-2020-14386 RH-Acked-by: Davide Caratti RH-Acked-by: Marcelo Leitner RH-Acked-by: Jarod Wilson RH-Acked-by: Paolo Abeni RH-Acked-by: Ivan Vecera Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1876224 Brew: https://brewweb.devel.redhat.com/taskinfo?taskID=31276277 Upstream Status: net.git commit 8e8e2951e309 commit 8e8e2951e3095732d7e780c241f61ea130955a57 Author: Eric Dumazet Date: Wed Jun 12 09:52:30 2019 -0700 net/packet: make tp_drops atomic Under DDOS, we want to be able to increment tp_drops without touching the spinlock. This will help readers to drain the receive queue slightly faster :/ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Hangbin Liu Signed-off-by: Timothy Redaelli Signed-off-by: Bruno Meneguele Acked-by: Joe Lawrence Acked-by: Yannick Cote Signed-off-by: Artem Savkov --- net/packet/af_packet.c | 118 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 106 insertions(+), 12 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d69fb2077196..4c67f7156a17 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -185,6 +185,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) #define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) +#define KLP_SHADOW_TP_DROPS 0x2020143860000000 + struct packet_sock; static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); @@ -747,6 +749,8 @@ static void prb_flush_block(struct tpacket_kbdq_core *pkc1, #endif } +#include "kpatch-macros.h" + /* * Side effect: * @@ -765,8 +769,9 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; struct sock *sk = &po->sk; + atomic_t *tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS); - if (po->stats.stats3.tp_drops) + if (tp_drops && atomic_read(tp_drops)) status |= TP_STATUS_LOSING; last_pkt = (struct tpacket3_hdr *)pkc1->prev; @@ -1281,6 +1286,8 @@ static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) static void packet_sock_destruct(struct sock *sk) { + struct packet_sock *po = pkt_sk(sk); + skb_queue_purge(&sk->sk_error_queue); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); @@ -1291,6 +1298,8 @@ static void packet_sock_destruct(struct sock *sk) return; } + klp_shadow_free(po, KLP_SHADOW_TP_DROPS, NULL); + sk_refcnt_debug_dec(sk); } @@ -1994,6 +2003,38 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, return err; } +#define BPF_PROG_RUN_KPATCH(prog, ctx) ({ \ + u32 ret; \ + cant_sleep(); \ + if (static_key_enabled(&bpf_stats_enabled_key)) { \ + struct bpf_prog_stats *stats; \ + u64 start = sched_clock(); \ + ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ + stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&stats->syncp); \ + stats->cnt++; \ + stats->nsecs += sched_clock() - start; \ + u64_stats_update_end(&stats->syncp); \ + } else { \ + ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ + } \ + ret; }) + +static inline u32 bpf_prog_run_clear_cb_kpatch(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u8 *cb_data = bpf_skb_cb(skb); + u32 res; + + if (unlikely(prog->cb_access)) + memset(cb_data, 0, BPF_SKB_CB_LEN); + + preempt_disable(); + res = BPF_PROG_RUN_KPATCH(prog, skb); + preempt_enable(); + return res; +} + static unsigned int run_filter(struct sk_buff *skb, const struct sock *sk, unsigned int res) @@ -2003,7 +2044,7 @@ static unsigned int run_filter(struct sk_buff *skb, rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter != NULL) - res = bpf_prog_run_clear_cb(filter->prog, skb); + res = bpf_prog_run_clear_cb_kpatch(filter->prog, skb); rcu_read_unlock(); return res; @@ -2046,6 +2087,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, int skb_len = skb->len; unsigned int snaplen, res; bool is_drop_n_account = false; + atomic_t *tp_drops; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -2053,6 +2095,17 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sk = pt->af_packet_priv; po = pkt_sk(sk); + tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS); + if (!tp_drops) { + tp_drops = klp_shadow_alloc(po, KLP_SHADOW_TP_DROPS, + sizeof(atomic_t*), GFP_ATOMIC, + NULL, NULL); + if (!tp_drops) + goto drop; + + atomic_set(tp_drops, po->stats.stats1.tp_drops); + } + if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; @@ -2135,10 +2188,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, drop_n_acct: is_drop_n_account = true; - spin_lock(&sk->sk_receive_queue.lock); - po->stats.stats1.tp_drops++; + atomic_inc(tp_drops); atomic_inc(&sk->sk_drops); - spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -2164,12 +2215,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_USER; - unsigned short macoff, netoff, hdrlen; + unsigned short macoff, hdrlen; + unsigned int netoff; struct sk_buff *copy_skb = NULL; struct timespec ts; __u32 ts_status; bool is_drop_n_account = false; bool do_vnet = false; + atomic_t *tp_drops; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing @@ -2184,6 +2237,17 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sk = pt->af_packet_priv; po = pkt_sk(sk); + tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS); + if (!tp_drops) { + tp_drops = klp_shadow_alloc(po, KLP_SHADOW_TP_DROPS, + sizeof(atomic_t*), GFP_ATOMIC, + NULL, NULL); + if (!tp_drops) + goto drop; + + atomic_set(tp_drops, po->stats.stats1.tp_drops); + } + if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; @@ -2226,6 +2290,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } macoff = netoff - maclen; } + if (netoff > USHRT_MAX) { + atomic_inc(tp_drops); + goto drop_n_restore; + } if (po->tp_version <= TPACKET_V2) { if (macoff + snaplen > po->rx_ring.frame_size) { if (po->copy_thresh && @@ -2272,7 +2340,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, * Anyways, moving it for V1/V2 only as V3 doesn't need this * at packet level. */ - if (po->stats.stats1.tp_drops) + if (atomic_read(tp_drops)) status |= TP_STATUS_LOSING; } @@ -2388,9 +2456,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_account: - is_drop_n_account = true; - po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); + atomic_inc(tp_drops); + is_drop_n_account = true; sk->sk_data_ready(sk); kfree_skb(copy_skb); @@ -3195,6 +3263,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; struct packet_sock *po; __be16 proto = (__force __be16)protocol; /* weird, but documented */ + atomic_t *tp_drops; int err; if (!ns_capable(net->user_ns, CAP_NET_RAW)) @@ -3221,9 +3290,16 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->num = proto; po->xmit = dev_queue_xmit; + tp_drops = klp_shadow_get_or_alloc(po, KLP_SHADOW_TP_DROPS, + sizeof(atomic_t*), GFP_KERNEL, + NULL, NULL); + + if (!tp_drops) + goto out2; + err = packet_alloc_pending(po); if (err) - goto out2; + goto out3; packet_cached_dev_reset(po); @@ -3258,6 +3334,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, preempt_enable(); return 0; +out3: + klp_shadow_free(po, KLP_SHADOW_TP_DROPS, NULL); out2: sk_free(sk); out: @@ -3873,6 +3951,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; + int drops; + atomic_t *tp_drops; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3883,20 +3963,34 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (len < 0) return -EINVAL; + tp_drops = klp_shadow_get(po, KLP_SHADOW_TP_DROPS); + if (!tp_drops) { + tp_drops = klp_shadow_alloc(po, KLP_SHADOW_TP_DROPS, + sizeof(atomic_t*), GFP_ATOMIC, + NULL, NULL); + if (!tp_drops) + return -ENOMEM; + + atomic_set(tp_drops, po->stats.stats1.tp_drops); + } + switch (optname) { case PACKET_STATISTICS: spin_lock_bh(&sk->sk_receive_queue.lock); memcpy(&st, &po->stats, sizeof(st)); memset(&po->stats, 0, sizeof(po->stats)); spin_unlock_bh(&sk->sk_receive_queue.lock); + drops = atomic_xchg(tp_drops, 0); if (po->tp_version == TPACKET_V3) { lv = sizeof(struct tpacket_stats_v3); - st.stats3.tp_packets += st.stats3.tp_drops; + st.stats3.tp_drops = drops; + st.stats3.tp_packets += drops; data = &st.stats3; } else { lv = sizeof(struct tpacket_stats); - st.stats1.tp_packets += st.stats1.tp_drops; + st.stats1.tp_drops = drops; + st.stats1.tp_packets += drops; data = &st.stats1; } -- 2.26.2