Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index b4abad1..2997382 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig
@@ -5,7 +5,7 @@
config PACKET
tristate "Packet socket"
- ---help---
+ help
The Packet protocol is used by applications which communicate
directly with network devices without an intermediate network
protocol implemented in the kernel, e.g. tcpdump. If you want them
@@ -20,6 +20,6 @@
tristate "Packet: sockets monitoring interface"
depends on PACKET
default n
- ---help---
+ help
Support for PF_PACKET sockets monitoring interface used by the ss tool.
If unsure, say Y.
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0ffbf3d..d0c95d7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -93,52 +93,56 @@
/*
Assumptions:
- - if device has no dev->hard_header routine, it adds and removes ll header
- inside itself. In this case ll header is invisible outside of device,
- but higher levels still should reserve dev->hard_header_len.
- Some devices are enough clever to reallocate skb, when header
- will not fit to reserved space (tunnel), another ones are silly
- (PPP).
+ - If the device has no dev->header_ops->create, there is no LL header
+ visible above the device. In this case, its hard_header_len should be 0.
+ The device may prepend its own header internally. In this case, its
+ needed_headroom should be set to the space needed for it to add its
+ internal header.
+ For example, a WiFi driver pretending to be an Ethernet driver should
+ set its hard_header_len to be the Ethernet header length, and set its
+ needed_headroom to be (the real WiFi header length - the fake Ethernet
+ header length).
- packet socket receives packets with pulled ll header,
so that SOCK_RAW should push it back.
On receive:
-----------
-Incoming, dev->hard_header!=NULL
+Incoming, dev_has_header(dev) == true
mac_header -> ll header
data -> data
-Outgoing, dev->hard_header!=NULL
+Outgoing, dev_has_header(dev) == true
mac_header -> ll header
data -> ll header
-Incoming, dev->hard_header==NULL
- mac_header -> UNKNOWN position. It is very likely, that it points to ll
- header. PPP makes it, that is wrong, because introduce
- assymetry between rx and tx paths.
+Incoming, dev_has_header(dev) == false
+ mac_header -> data
+ However drivers often make it point to the ll header.
+ This is incorrect because the ll header should be invisible to us.
data -> data
-Outgoing, dev->hard_header==NULL
- mac_header -> data. ll header is still not built!
+Outgoing, dev_has_header(dev) == false
+ mac_header -> data. ll header is invisible to us.
data -> data
Resume
- If dev->hard_header==NULL we are unlikely to restore sensible ll header.
+ If dev_has_header(dev) == false we are unable to restore the ll header,
+ because it is invisible to us.
On transmit:
------------
-dev->hard_header != NULL
+dev->header_ops != NULL
mac_header -> ll header
data -> ll header
-dev->hard_header == NULL (ll header is added by device, we cannot control it)
+dev->header_ops == NULL (ll header is invisible to us)
mac_header -> data
data -> data
- We should set nh.raw on output to correct posistion,
+ We should set network_header on output to the correct position,
packet classifier depends on it.
*/
@@ -177,7 +181,6 @@
#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
-#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
struct packet_sock;
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -408,17 +411,18 @@
}
}
-static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
unsigned int flags)
{
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
if (shhwtstamps &&
(flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
+ ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
return TP_STATUS_TS_RAW_HARDWARE;
- if (ktime_to_timespec_cond(skb->tstamp, ts))
+ if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
+ ktime_to_timespec64_cond(skb->tstamp, ts))
return TP_STATUS_TS_SOFTWARE;
return 0;
@@ -428,13 +432,20 @@
struct sk_buff *skb)
{
union tpacket_uhdr h;
- struct timespec ts;
+ struct timespec64 ts;
__u32 ts_status;
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
return 0;
h.raw = frame;
+ /*
+ * versions 1 through 3 overflow the timestamps in y2106, since they
+ * all store the seconds in a 32-bit unsigned integer.
+ * If we create a version 4, that should have a 64-bit timestamp,
+ * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
+ * nanoseconds.
+ */
switch (po->tp_version) {
case TPACKET_V1:
h.h1->tp_sec = ts.tv_sec;
@@ -520,7 +531,7 @@
int blk_size_in_bytes)
{
struct net_device *dev;
- unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
+ unsigned int mbits, div;
struct ethtool_link_ksettings ecmd;
int err;
@@ -532,31 +543,25 @@
}
err = __ethtool_get_link_ksettings(dev, &ecmd);
rtnl_unlock();
- if (!err) {
- /*
- * If the link speed is so slow you don't really
- * need to worry about perf anyways
- */
- if (ecmd.base.speed < SPEED_1000 ||
- ecmd.base.speed == SPEED_UNKNOWN) {
- return DEFAULT_PRB_RETIRE_TOV;
- } else {
- msec = 1;
- div = ecmd.base.speed / 1000;
- }
- } else
+ if (err)
return DEFAULT_PRB_RETIRE_TOV;
+ /* If the link speed is so slow you don't really
+ * need to worry about perf anyways
+ */
+ if (ecmd.base.speed < SPEED_1000 ||
+ ecmd.base.speed == SPEED_UNKNOWN)
+ return DEFAULT_PRB_RETIRE_TOV;
+
+ div = ecmd.base.speed / 1000;
mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
if (div)
mbits /= div;
- tmo = mbits * msec;
-
if (div)
- return tmo+1;
- return tmo;
+ return mbits + 1;
+ return mbits;
}
static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
@@ -592,6 +597,7 @@
req_u->req3.tp_block_size);
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+ rwlock_init(&p1->blk_fill_in_prog_lock);
p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
@@ -658,10 +664,9 @@
*
*/
if (BLOCK_NUM_PKTS(pbd)) {
- while (atomic_read(&pkc->blk_fill_in_prog)) {
- /* Waiting for skb_copy_bits to finish... */
- cpu_relax();
- }
+ /* Waiting for skb_copy_bits to finish... */
+ write_lock(&pkc->blk_fill_in_prog_lock);
+ write_unlock(&pkc->blk_fill_in_prog_lock);
}
if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
@@ -775,8 +780,8 @@
* It shouldn't really happen as we don't close empty
* blocks. See prb_retire_rx_blk_timer_expired().
*/
- struct timespec ts;
- getnstimeofday(&ts);
+ struct timespec64 ts;
+ ktime_get_real_ts64(&ts);
h1->ts_last_pkt.ts_sec = ts.tv_sec;
h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
}
@@ -806,7 +811,7 @@
static void prb_open_block(struct tpacket_kbdq_core *pkc1,
struct tpacket_block_desc *pbd1)
{
- struct timespec ts;
+ struct timespec64 ts;
struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
smp_rmb();
@@ -819,7 +824,7 @@
BLOCK_NUM_PKTS(pbd1) = 0;
BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- getnstimeofday(&ts);
+ ktime_get_real_ts64(&ts);
h1->ts_first_pkt.ts_sec = ts.tv_sec;
h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
@@ -920,10 +925,9 @@
* the timer-handler already handled this case.
*/
if (!(status & TP_STATUS_BLK_TMO)) {
- while (atomic_read(&pkc->blk_fill_in_prog)) {
- /* Waiting for skb_copy_bits to finish... */
- cpu_relax();
- }
+ /* Waiting for skb_copy_bits to finish... */
+ write_lock(&pkc->blk_fill_in_prog_lock);
+ write_unlock(&pkc->blk_fill_in_prog_lock);
}
prb_close_block(pkc, pbd, po, status);
return;
@@ -944,7 +948,8 @@
__releases(&pkc->blk_fill_in_prog_lock)
{
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
- atomic_dec(&pkc->blk_fill_in_prog);
+
+ read_unlock(&pkc->blk_fill_in_prog_lock);
}
static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
@@ -999,7 +1004,7 @@
pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
BLOCK_NUM_PKTS(pbd) += 1;
- atomic_inc(&pkc->blk_fill_in_prog);
+ read_lock(&pkc->blk_fill_in_prog_lock);
prb_run_all_ft_ops(pkc, ppd);
}
@@ -1354,7 +1359,7 @@
struct packet_sock *po, *po_next, *po_skip = NULL;
unsigned int i, j, room = ROOM_NONE;
- po = pkt_sk(f->arr[idx]);
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
if (try_self) {
room = packet_rcv_has_room(po, skb);
@@ -1366,7 +1371,7 @@
i = j = min_t(int, po->rollover->sock, num - 1);
do {
- po_next = pkt_sk(f->arr[i]);
+ po_next = pkt_sk(rcu_dereference(f->arr[i]));
if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
if (i != j)
@@ -1461,7 +1466,7 @@
if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
idx = fanout_demux_rollover(f, skb, idx, true, num);
- po = pkt_sk(f->arr[idx]);
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
@@ -1475,7 +1480,7 @@
struct packet_fanout *f = po->fanout;
spin_lock(&f->lock);
- f->arr[f->num_members] = sk;
+ rcu_assign_pointer(f->arr[f->num_members], sk);
smp_wmb();
f->num_members++;
if (f->num_members == 1)
@@ -1490,11 +1495,14 @@
spin_lock(&f->lock);
for (i = 0; i < f->num_members; i++) {
- if (f->arr[i] == sk)
+ if (rcu_dereference_protected(f->arr[i],
+ lockdep_is_held(&f->lock)) == sk)
break;
}
BUG_ON(i >= f->num_members);
- f->arr[i] = f->arr[f->num_members - 1];
+ rcu_assign_pointer(f->arr[i],
+ rcu_dereference_protected(f->arr[f->num_members - 1],
+ lockdep_is_held(&f->lock)));
f->num_members--;
if (f->num_members == 0)
__dev_remove_pack(&f->prot_hook);
@@ -1537,7 +1545,7 @@
}
}
-static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1546,10 +1554,10 @@
if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
return -EPERM;
- if (len != sizeof(fprog))
- return -EINVAL;
- if (copy_from_user(&fprog, data, len))
- return -EFAULT;
+
+ ret = copy_bpf_fprog_from_user(&fprog, data, len);
+ if (ret)
+ return ret;
ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
if (ret)
@@ -1559,7 +1567,7 @@
return 0;
}
-static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1569,7 +1577,7 @@
return -EPERM;
if (len != sizeof(fd))
return -EINVAL;
- if (copy_from_user(&fd, data, len))
+ if (copy_from_sockptr(&fd, data, len))
return -EFAULT;
new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
@@ -1580,7 +1588,7 @@
return 0;
}
-static int fanout_set_data(struct packet_sock *po, char __user *data,
+static int fanout_set_data(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
switch (po->fanout->type) {
@@ -1632,13 +1640,15 @@
return false;
}
-static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
+static int fanout_add(struct sock *sk, struct fanout_args *args)
{
struct packet_rollover *rollover = NULL;
struct packet_sock *po = pkt_sk(sk);
+ u16 type_flags = args->type_flags;
struct packet_fanout *f, *match;
u8 type = type_flags & 0xff;
u8 flags = type_flags >> 8;
+ u16 id = args->id;
int err;
switch (type) {
@@ -1696,11 +1706,21 @@
}
}
err = -EINVAL;
- if (match && match->flags != flags)
- goto out;
- if (!match) {
+ if (match) {
+ if (match->flags != flags)
+ goto out;
+ if (args->max_num_members &&
+ args->max_num_members != match->max_num_members)
+ goto out;
+ } else {
+ if (args->max_num_members > PACKET_FANOUT_MAX)
+ goto out;
+ if (!args->max_num_members)
+ /* legacy PACKET_FANOUT_MAX */
+ args->max_num_members = 256;
err = -ENOMEM;
- match = kzalloc(sizeof(*match), GFP_KERNEL);
+ match = kvzalloc(struct_size(match, arr, args->max_num_members),
+ GFP_KERNEL);
if (!match)
goto out;
write_pnet(&match->net, sock_net(sk));
@@ -1715,7 +1735,9 @@
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
+ match->max_num_members = args->max_num_members;
list_add(&match->list, &fanout_list);
}
err = -EINVAL;
@@ -1726,9 +1748,12 @@
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
- if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+ if (refcount_read(&match->sk_ref) < match->max_num_members) {
__dev_remove_pack(&po->prot_hook);
- po->fanout = match;
+
+ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
+ WRITE_ONCE(po->fanout, match);
+
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
@@ -1740,7 +1765,7 @@
if (err && !refcount_read(&match->sk_ref)) {
list_del(&match->list);
- kfree(match);
+ kvfree(match);
}
out:
@@ -2065,7 +2090,7 @@
skb->dev = dev;
- if (dev->header_ops) {
+ if (dev_has_header(dev)) {
/* The device has an explicit notion of ll header,
* exported to higher levels.
*
@@ -2172,7 +2197,7 @@
unsigned short macoff, hdrlen;
unsigned int netoff;
struct sk_buff *copy_skb = NULL;
- struct timespec ts;
+ struct timespec64 ts;
__u32 ts_status;
bool is_drop_n_account = false;
unsigned int slot_id = 0;
@@ -2194,7 +2219,7 @@
if (!net_eq(dev_net(dev), sock_net(sk)))
goto drop;
- if (dev->header_ops) {
+ if (dev_has_header(dev)) {
if (sk->sk_type != SOCK_DGRAM)
skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
@@ -2253,8 +2278,11 @@
copy_skb = skb_get(skb);
skb_head = skb->data;
}
- if (copy_skb)
+ if (copy_skb) {
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
skb_set_owner_r(copy_skb, sk);
+ }
}
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
@@ -2319,8 +2347,13 @@
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
- if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
- getnstimeofday(&ts);
+ /* Always timestamp; prefer an existing software timestamp taken
+ * closer to the time of capture.
+ */
+ ts_status = tpacket_get_timestamp(skb, &ts,
+ po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
+ if (!ts_status)
+ ktime_get_real_ts64(&ts);
status |= ts_status;
@@ -3007,10 +3040,13 @@
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- if (po->tx_ring.pg_vec)
+ /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
+ * tpacket_snd() will redo the check safely.
+ */
+ if (data_race(po->tx_ring.pg_vec))
return tpacket_snd(po, msg);
- else
- return packet_snd(sock, msg, len);
+
+ return packet_snd(sock, msg, len);
}
/*
@@ -3071,7 +3107,7 @@
kfree(po->rollover);
if (f) {
fanout_release_data(f);
- kfree(f);
+ kvfree(f);
}
/*
* Now the socket is dead. No more input will appear.
@@ -3294,6 +3330,7 @@
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
@@ -3400,6 +3437,8 @@
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ const size_t max_len = min(sizeof(skb->cb),
+ sizeof(struct sockaddr_storage));
int copy_len;
/* If the address length field is there to be filled
@@ -3422,6 +3461,10 @@
msg->msg_namelen = sizeof(struct sockaddr_ll);
}
}
+ if (WARN_ON_ONCE(copy_len > max_len)) {
+ copy_len = max_len;
+ msg->msg_namelen = copy_len;
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
@@ -3663,7 +3706,8 @@
}
static int
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
@@ -3683,7 +3727,7 @@
return -EINVAL;
if (len > sizeof(mreq))
len = sizeof(mreq);
- if (copy_from_user(&mreq, optval, len))
+ if (copy_from_sockptr(&mreq, optval, len))
return -EFAULT;
if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
return -EINVAL;
@@ -3714,7 +3758,7 @@
if (optlen < len) {
ret = -EINVAL;
} else {
- if (copy_from_user(&req_u.req, optval, len))
+ if (copy_from_sockptr(&req_u.req, optval, len))
ret = -EFAULT;
else
ret = packet_set_ring(sk, &req_u, 0,
@@ -3729,7 +3773,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
pkt_sk(sk)->copy_thresh = val;
@@ -3741,7 +3785,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (val) {
case TPACKET_V1:
@@ -3767,7 +3811,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
if (val > INT_MAX)
return -EINVAL;
@@ -3787,7 +3831,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3806,7 +3850,7 @@
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3820,7 +3864,7 @@
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3836,7 +3880,7 @@
return -EINVAL;
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3855,7 +3899,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
po->tp_tstamp = val;
@@ -3863,18 +3907,19 @@
}
case PACKET_FANOUT:
{
- int val;
+ struct fanout_args args = { 0 };
- if (optlen != sizeof(val))
+ if (optlen != sizeof(int) && optlen != sizeof(args))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&args, optval, optlen))
return -EFAULT;
- return fanout_add(sk, val & 0xffff, val >> 16);
+ return fanout_add(sk, &args);
}
case PACKET_FANOUT_DATA:
{
- if (!po->fanout)
+ /* Paired with the WRITE_ONCE() in fanout_add() */
+ if (!READ_ONCE(po->fanout))
return -EINVAL;
return fanout_set_data(po, optval, optlen);
@@ -3885,7 +3930,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
if (val < 0 || val > 1)
return -EINVAL;
@@ -3899,7 +3944,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3918,7 +3963,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
@@ -4051,28 +4096,6 @@
return 0;
}
-
-#ifdef CONFIG_COMPAT
-static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct packet_sock *po = pkt_sk(sock->sk);
-
- if (level != SOL_PACKET)
- return -ENOPROTOOPT;
-
- if (optname == PACKET_FANOUT_DATA &&
- po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
- optval = (char __user *)get_compat_bpf_fprog(optval);
- if (!optval)
- return -EFAULT;
- optlen = sizeof(struct sock_fprog);
- }
-
- return packet_setsockopt(sock, level, optname, optval, optlen);
-}
-#endif
-
static int packet_notifier(struct notifier_block *this,
unsigned long msg, void *ptr)
{
@@ -4088,7 +4111,7 @@
case NETDEV_UNREGISTER:
if (po->mclist)
packet_dev_mclist_delete(dev, &po->mclist);
- /* fallthrough */
+ fallthrough;
case NETDEV_DOWN:
if (dev->ifindex == po->ifindex) {
@@ -4304,7 +4327,7 @@
struct packet_ring_buffer *rb;
struct sk_buff_head *rb_queue;
__be16 num;
- int err = -EINVAL;
+ int err;
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;
@@ -4453,9 +4476,10 @@
}
out_free_pg_vec:
- bitmap_free(rx_owner_map);
- if (pg_vec)
+ if (pg_vec) {
+ bitmap_free(rx_owner_map);
free_pg_vec(pg_vec, order, req->tp_block_nr);
+ }
out:
return err;
}
@@ -4536,8 +4560,6 @@
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = packet_sendmsg_spkt,
.recvmsg = packet_recvmsg,
.mmap = sock_no_mmap,
@@ -4560,9 +4582,6 @@
.shutdown = sock_no_shutdown,
.setsockopt = packet_setsockopt,
.getsockopt = packet_getsockopt,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_packet_setsockopt,
-#endif
.sendmsg = packet_sendmsg,
.recvmsg = packet_recvmsg,
.mmap = packet_mmap,
@@ -4639,9 +4658,11 @@
mutex_init(&net->packet.sklist_lock);
INIT_HLIST_HEAD(&net->packet.sklist);
+#ifdef CONFIG_PROC_FS
if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
sizeof(struct seq_net_private)))
return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
return 0;
}
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 907f4cd..7af1e91 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -39,7 +39,7 @@
char *nxt_offset;
struct sk_buff *skb;
- atomic_t blk_fill_in_prog;
+ rwlock_t blk_fill_in_prog_lock;
/* Default is set to 8ms */
#define DEFAULT_PRB_RETIRE_TOV (8)
@@ -77,11 +77,12 @@
};
extern struct mutex fanout_mutex;
-#define PACKET_FANOUT_MAX 256
+#define PACKET_FANOUT_MAX (1 << 16)
struct packet_fanout {
possible_net_t net;
unsigned int num_members;
+ u32 max_num_members;
u16 id;
u8 type;
u8 flags;
@@ -90,10 +91,10 @@
struct bpf_prog __rcu *bpf_prog;
};
struct list_head list;
- struct sock *arr[PACKET_FANOUT_MAX];
spinlock_t lock;
refcount_t sk_ref;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
+ struct sock __rcu *arr[];
};
struct packet_rollover {