blob: 3098710c9c3443aa87b97fd0804d963f73f2b01c [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 *
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000048 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80#include <linux/module.h>
81#include <linux/kernel.h>
82#include <linux/signal.h>
83#include <linux/sched/signal.h>
84#include <linux/errno.h>
85#include <linux/string.h>
86#include <linux/stat.h>
87#include <linux/dcache.h>
88#include <linux/namei.h>
89#include <linux/socket.h>
90#include <linux/un.h>
91#include <linux/fcntl.h>
92#include <linux/termios.h>
93#include <linux/sockios.h>
94#include <linux/net.h>
95#include <linux/in.h>
96#include <linux/fs.h>
97#include <linux/slab.h>
98#include <linux/uaccess.h>
99#include <linux/skbuff.h>
100#include <linux/netdevice.h>
101#include <net/net_namespace.h>
102#include <net/sock.h>
103#include <net/tcp_states.h>
104#include <net/af_unix.h>
105#include <linux/proc_fs.h>
106#include <linux/seq_file.h>
107#include <net/scm.h>
108#include <linux/init.h>
109#include <linux/poll.h>
110#include <linux/rtnetlink.h>
111#include <linux/mount.h>
112#include <net/checksum.h>
113#include <linux/security.h>
114#include <linux/freezer.h>
115#include <linux/file.h>
116
David Brazdil0f672f62019-12-10 10:32:29 +0000117#include "scm.h"
118
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000119struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120EXPORT_SYMBOL_GPL(unix_socket_table);
121DEFINE_SPINLOCK(unix_table_lock);
122EXPORT_SYMBOL_GPL(unix_table_lock);
123static atomic_long_t unix_nr_socks;
124
125
126static struct hlist_head *unix_sockets_unbound(void *addr)
127{
128 unsigned long hash = (unsigned long)addr;
129
130 hash ^= hash >> 16;
131 hash ^= hash >> 8;
132 hash %= UNIX_HASH_SIZE;
133 return &unix_socket_table[UNIX_HASH_SIZE + hash];
134}
135
136#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137
138#ifdef CONFIG_SECURITY_NETWORK
139static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140{
141 UNIXCB(skb).secid = scm->secid;
142}
143
144static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145{
146 scm->secid = UNIXCB(skb).secid;
147}
148
149static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150{
151 return (scm->secid == UNIXCB(skb).secid);
152}
153#else
154static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155{ }
156
157static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158{ }
159
160static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161{
162 return true;
163}
164#endif /* CONFIG_SECURITY_NETWORK */
165
166/*
167 * SMP locking strategy:
168 * hash table is protected with spinlock unix_table_lock
169 * each socket state is protected by separate spin lock.
170 */
171
172static inline unsigned int unix_hash_fold(__wsum n)
173{
174 unsigned int hash = (__force unsigned int)csum_fold(n);
175
176 hash ^= hash>>8;
177 return hash&(UNIX_HASH_SIZE-1);
178}
179
180#define unix_peer(sk) (unix_sk(sk)->peer)
181
182static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183{
184 return unix_peer(osk) == sk;
185}
186
187static inline int unix_may_send(struct sock *sk, struct sock *osk)
188{
189 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190}
191
Olivier Deprez0e641232021-09-23 10:07:05 +0200192static inline int unix_recvq_full(const struct sock *sk)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000193{
194 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195}
196
Olivier Deprez0e641232021-09-23 10:07:05 +0200197static inline int unix_recvq_full_lockless(const struct sock *sk)
198{
199 return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 READ_ONCE(sk->sk_max_ack_backlog);
201}
202
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000203struct sock *unix_peer_get(struct sock *s)
204{
205 struct sock *peer;
206
207 unix_state_lock(s);
208 peer = unix_peer(s);
209 if (peer)
210 sock_hold(peer);
211 unix_state_unlock(s);
212 return peer;
213}
214EXPORT_SYMBOL_GPL(unix_peer_get);
215
216static inline void unix_release_addr(struct unix_address *addr)
217{
218 if (refcount_dec_and_test(&addr->refcnt))
219 kfree(addr);
220}
221
222/*
223 * Check unix socket name:
224 * - should be not zero length.
225 * - if started by not zero, should be NULL terminated (FS object)
226 * - if started by zero, it is abstract name.
227 */
228
229static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230{
David Brazdil0f672f62019-12-10 10:32:29 +0000231 *hashp = 0;
232
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000233 if (len <= sizeof(short) || len > sizeof(*sunaddr))
234 return -EINVAL;
235 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236 return -EINVAL;
237 if (sunaddr->sun_path[0]) {
238 /*
239 * This may look like an off by one error but it is a bit more
240 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 * sun_path[108] doesn't as such exist. However in kernel space
242 * we are guaranteed that it is a valid memory location in our
243 * kernel address buffer.
244 */
245 ((char *)sunaddr)[len] = 0;
246 len = strlen(sunaddr->sun_path)+1+sizeof(short);
247 return len;
248 }
249
250 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251 return len;
252}
253
254static void __unix_remove_socket(struct sock *sk)
255{
256 sk_del_node_init(sk);
257}
258
259static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260{
261 WARN_ON(!sk_unhashed(sk));
262 sk_add_node(sk, list);
263}
264
265static inline void unix_remove_socket(struct sock *sk)
266{
267 spin_lock(&unix_table_lock);
268 __unix_remove_socket(sk);
269 spin_unlock(&unix_table_lock);
270}
271
272static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273{
274 spin_lock(&unix_table_lock);
275 __unix_insert_socket(list, sk);
276 spin_unlock(&unix_table_lock);
277}
278
279static struct sock *__unix_find_socket_byname(struct net *net,
280 struct sockaddr_un *sunname,
281 int len, int type, unsigned int hash)
282{
283 struct sock *s;
284
285 sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 struct unix_sock *u = unix_sk(s);
287
288 if (!net_eq(sock_net(s), net))
289 continue;
290
291 if (u->addr->len == len &&
292 !memcmp(u->addr->name, sunname, len))
293 goto found;
294 }
295 s = NULL;
296found:
297 return s;
298}
299
300static inline struct sock *unix_find_socket_byname(struct net *net,
301 struct sockaddr_un *sunname,
302 int len, int type,
303 unsigned int hash)
304{
305 struct sock *s;
306
307 spin_lock(&unix_table_lock);
308 s = __unix_find_socket_byname(net, sunname, len, type, hash);
309 if (s)
310 sock_hold(s);
311 spin_unlock(&unix_table_lock);
312 return s;
313}
314
315static struct sock *unix_find_socket_byinode(struct inode *i)
316{
317 struct sock *s;
318
319 spin_lock(&unix_table_lock);
320 sk_for_each(s,
321 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
322 struct dentry *dentry = unix_sk(s)->path.dentry;
323
324 if (dentry && d_backing_inode(dentry) == i) {
325 sock_hold(s);
326 goto found;
327 }
328 }
329 s = NULL;
330found:
331 spin_unlock(&unix_table_lock);
332 return s;
333}
334
335/* Support code for asymmetrically connected dgram sockets
336 *
337 * If a datagram socket is connected to a socket not itself connected
338 * to the first socket (eg, /dev/log), clients may only enqueue more
339 * messages if the present receive queue of the server socket is not
340 * "too large". This means there's a second writeability condition
341 * poll and sendmsg need to test. The dgram recv code will do a wake
342 * up on the peer_wait wait queue of a socket upon reception of a
343 * datagram which needs to be propagated to sleeping would-be writers
344 * since these might not have sent anything so far. This can't be
345 * accomplished via poll_wait because the lifetime of the server
346 * socket might be less than that of its clients if these break their
347 * association with it or if the server socket is closed while clients
348 * are still connected to it and there's no way to inform "a polling
349 * implementation" that it should let go of a certain wait queue
350 *
351 * In order to propagate a wake up, a wait_queue_entry_t of the client
352 * socket is enqueued on the peer_wait queue of the server socket
353 * whose wake function does a wake_up on the ordinary client socket
354 * wait queue. This connection is established whenever a write (or
355 * poll for write) hit the flow control condition and broken when the
356 * association to the server socket is dissolved or after a wake up
357 * was relayed.
358 */
359
360static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
361 void *key)
362{
363 struct unix_sock *u;
364 wait_queue_head_t *u_sleep;
365
366 u = container_of(q, struct unix_sock, peer_wake);
367
368 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
369 q);
370 u->peer_wake.private = NULL;
371
372 /* relaying can only happen while the wq still exists */
373 u_sleep = sk_sleep(&u->sk);
374 if (u_sleep)
375 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
376
377 return 0;
378}
379
380static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
381{
382 struct unix_sock *u, *u_other;
383 int rc;
384
385 u = unix_sk(sk);
386 u_other = unix_sk(other);
387 rc = 0;
388 spin_lock(&u_other->peer_wait.lock);
389
390 if (!u->peer_wake.private) {
391 u->peer_wake.private = other;
392 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
393
394 rc = 1;
395 }
396
397 spin_unlock(&u_other->peer_wait.lock);
398 return rc;
399}
400
401static void unix_dgram_peer_wake_disconnect(struct sock *sk,
402 struct sock *other)
403{
404 struct unix_sock *u, *u_other;
405
406 u = unix_sk(sk);
407 u_other = unix_sk(other);
408 spin_lock(&u_other->peer_wait.lock);
409
410 if (u->peer_wake.private == other) {
411 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
412 u->peer_wake.private = NULL;
413 }
414
415 spin_unlock(&u_other->peer_wait.lock);
416}
417
418static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
419 struct sock *other)
420{
421 unix_dgram_peer_wake_disconnect(sk, other);
422 wake_up_interruptible_poll(sk_sleep(sk),
423 EPOLLOUT |
424 EPOLLWRNORM |
425 EPOLLWRBAND);
426}
427
428/* preconditions:
429 * - unix_peer(sk) == other
430 * - association is stable
431 */
432static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
433{
434 int connected;
435
436 connected = unix_dgram_peer_wake_connect(sk, other);
437
438 /* If other is SOCK_DEAD, we want to make sure we signal
439 * POLLOUT, such that a subsequent write() can get a
440 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
441 * to other and its full, we will hang waiting for POLLOUT.
442 */
443 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
444 return 1;
445
446 if (connected)
447 unix_dgram_peer_wake_disconnect(sk, other);
448
449 return 0;
450}
451
452static int unix_writable(const struct sock *sk)
453{
454 return sk->sk_state != TCP_LISTEN &&
455 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
456}
457
458static void unix_write_space(struct sock *sk)
459{
460 struct socket_wq *wq;
461
462 rcu_read_lock();
463 if (unix_writable(sk)) {
464 wq = rcu_dereference(sk->sk_wq);
465 if (skwq_has_sleeper(wq))
466 wake_up_interruptible_sync_poll(&wq->wait,
467 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
468 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
469 }
470 rcu_read_unlock();
471}
472
473/* When dgram socket disconnects (or changes its peer), we clear its receive
474 * queue of packets arrived from previous peer. First, it allows to do
475 * flow control based only on wmem_alloc; second, sk connected to peer
476 * may receive messages only from that peer. */
477static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
478{
479 if (!skb_queue_empty(&sk->sk_receive_queue)) {
480 skb_queue_purge(&sk->sk_receive_queue);
481 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
482
483 /* If one link of bidirectional dgram pipe is disconnected,
484 * we signal error. Messages are lost. Do not make this,
485 * when peer was not connected to us.
486 */
487 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
488 other->sk_err = ECONNRESET;
489 other->sk_error_report(other);
490 }
491 }
492}
493
494static void unix_sock_destructor(struct sock *sk)
495{
496 struct unix_sock *u = unix_sk(sk);
497
498 skb_queue_purge(&sk->sk_receive_queue);
499
500 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
501 WARN_ON(!sk_unhashed(sk));
502 WARN_ON(sk->sk_socket);
503 if (!sock_flag(sk, SOCK_DEAD)) {
504 pr_info("Attempt to release alive unix socket: %p\n", sk);
505 return;
506 }
507
508 if (u->addr)
509 unix_release_addr(u->addr);
510
511 atomic_long_dec(&unix_nr_socks);
512 local_bh_disable();
513 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
514 local_bh_enable();
515#ifdef UNIX_REFCNT_DEBUG
516 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
517 atomic_long_read(&unix_nr_socks));
518#endif
519}
520
521static void unix_release_sock(struct sock *sk, int embrion)
522{
523 struct unix_sock *u = unix_sk(sk);
524 struct path path;
525 struct sock *skpair;
526 struct sk_buff *skb;
527 int state;
528
529 unix_remove_socket(sk);
530
531 /* Clear state */
532 unix_state_lock(sk);
533 sock_orphan(sk);
534 sk->sk_shutdown = SHUTDOWN_MASK;
535 path = u->path;
536 u->path.dentry = NULL;
537 u->path.mnt = NULL;
538 state = sk->sk_state;
539 sk->sk_state = TCP_CLOSE;
Olivier Deprez0e641232021-09-23 10:07:05 +0200540
541 skpair = unix_peer(sk);
542 unix_peer(sk) = NULL;
543
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000544 unix_state_unlock(sk);
545
546 wake_up_interruptible_all(&u->peer_wait);
547
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000548 if (skpair != NULL) {
549 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
550 unix_state_lock(skpair);
551 /* No more writes */
552 skpair->sk_shutdown = SHUTDOWN_MASK;
553 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
554 skpair->sk_err = ECONNRESET;
555 unix_state_unlock(skpair);
556 skpair->sk_state_change(skpair);
557 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
558 }
559
560 unix_dgram_peer_wake_disconnect(sk, skpair);
561 sock_put(skpair); /* It may now die */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000562 }
563
564 /* Try to flush out this socket. Throw out buffers at least */
565
566 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
567 if (state == TCP_LISTEN)
568 unix_release_sock(skb->sk, 1);
569 /* passed fds are erased in the kfree_skb hook */
570 UNIXCB(skb).consumed = skb->len;
571 kfree_skb(skb);
572 }
573
574 if (path.dentry)
575 path_put(&path);
576
577 sock_put(sk);
578
579 /* ---- Socket is dead now and most probably destroyed ---- */
580
581 /*
582 * Fixme: BSD difference: In BSD all sockets connected to us get
583 * ECONNRESET and we die on the spot. In Linux we behave
584 * like files and pipes do and wait for the last
585 * dereference.
586 *
587 * Can't we simply set sock->err?
588 *
589 * What the above comment does talk about? --ANK(980817)
590 */
591
592 if (unix_tot_inflight)
593 unix_gc(); /* Garbage collect fds */
594}
595
596static void init_peercred(struct sock *sk)
597{
598 put_pid(sk->sk_peer_pid);
599 if (sk->sk_peer_cred)
600 put_cred(sk->sk_peer_cred);
601 sk->sk_peer_pid = get_pid(task_tgid(current));
602 sk->sk_peer_cred = get_current_cred();
603}
604
605static void copy_peercred(struct sock *sk, struct sock *peersk)
606{
607 put_pid(sk->sk_peer_pid);
608 if (sk->sk_peer_cred)
609 put_cred(sk->sk_peer_cred);
610 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
611 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
612}
613
614static int unix_listen(struct socket *sock, int backlog)
615{
616 int err;
617 struct sock *sk = sock->sk;
618 struct unix_sock *u = unix_sk(sk);
619 struct pid *old_pid = NULL;
620
621 err = -EOPNOTSUPP;
622 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
623 goto out; /* Only stream/seqpacket sockets accept */
624 err = -EINVAL;
625 if (!u->addr)
626 goto out; /* No listens on an unbound socket */
627 unix_state_lock(sk);
628 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
629 goto out_unlock;
630 if (backlog > sk->sk_max_ack_backlog)
631 wake_up_interruptible_all(&u->peer_wait);
632 sk->sk_max_ack_backlog = backlog;
633 sk->sk_state = TCP_LISTEN;
634 /* set credentials so connect can copy them */
635 init_peercred(sk);
636 err = 0;
637
638out_unlock:
639 unix_state_unlock(sk);
640 put_pid(old_pid);
641out:
642 return err;
643}
644
645static int unix_release(struct socket *);
646static int unix_bind(struct socket *, struct sockaddr *, int);
647static int unix_stream_connect(struct socket *, struct sockaddr *,
648 int addr_len, int flags);
649static int unix_socketpair(struct socket *, struct socket *);
650static int unix_accept(struct socket *, struct socket *, int, bool);
651static int unix_getname(struct socket *, struct sockaddr *, int);
652static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
653static __poll_t unix_dgram_poll(struct file *, struct socket *,
654 poll_table *);
655static int unix_ioctl(struct socket *, unsigned int, unsigned long);
Olivier Deprez0e641232021-09-23 10:07:05 +0200656#ifdef CONFIG_COMPAT
657static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
658#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000659static int unix_shutdown(struct socket *, int);
660static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
661static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
662static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
663 size_t size, int flags);
664static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
665 struct pipe_inode_info *, size_t size,
666 unsigned int flags);
667static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
668static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
669static int unix_dgram_connect(struct socket *, struct sockaddr *,
670 int, int);
671static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
672static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
673 int);
674
675static int unix_set_peek_off(struct sock *sk, int val)
676{
677 struct unix_sock *u = unix_sk(sk);
678
679 if (mutex_lock_interruptible(&u->iolock))
680 return -EINTR;
681
682 sk->sk_peek_off = val;
683 mutex_unlock(&u->iolock);
684
685 return 0;
686}
687
688
689static const struct proto_ops unix_stream_ops = {
690 .family = PF_UNIX,
691 .owner = THIS_MODULE,
692 .release = unix_release,
693 .bind = unix_bind,
694 .connect = unix_stream_connect,
695 .socketpair = unix_socketpair,
696 .accept = unix_accept,
697 .getname = unix_getname,
698 .poll = unix_poll,
699 .ioctl = unix_ioctl,
Olivier Deprez0e641232021-09-23 10:07:05 +0200700#ifdef CONFIG_COMPAT
701 .compat_ioctl = unix_compat_ioctl,
702#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000703 .listen = unix_listen,
704 .shutdown = unix_shutdown,
705 .setsockopt = sock_no_setsockopt,
706 .getsockopt = sock_no_getsockopt,
707 .sendmsg = unix_stream_sendmsg,
708 .recvmsg = unix_stream_recvmsg,
709 .mmap = sock_no_mmap,
710 .sendpage = unix_stream_sendpage,
711 .splice_read = unix_stream_splice_read,
712 .set_peek_off = unix_set_peek_off,
713};
714
715static const struct proto_ops unix_dgram_ops = {
716 .family = PF_UNIX,
717 .owner = THIS_MODULE,
718 .release = unix_release,
719 .bind = unix_bind,
720 .connect = unix_dgram_connect,
721 .socketpair = unix_socketpair,
722 .accept = sock_no_accept,
723 .getname = unix_getname,
724 .poll = unix_dgram_poll,
725 .ioctl = unix_ioctl,
Olivier Deprez0e641232021-09-23 10:07:05 +0200726#ifdef CONFIG_COMPAT
727 .compat_ioctl = unix_compat_ioctl,
728#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000729 .listen = sock_no_listen,
730 .shutdown = unix_shutdown,
731 .setsockopt = sock_no_setsockopt,
732 .getsockopt = sock_no_getsockopt,
733 .sendmsg = unix_dgram_sendmsg,
734 .recvmsg = unix_dgram_recvmsg,
735 .mmap = sock_no_mmap,
736 .sendpage = sock_no_sendpage,
737 .set_peek_off = unix_set_peek_off,
738};
739
740static const struct proto_ops unix_seqpacket_ops = {
741 .family = PF_UNIX,
742 .owner = THIS_MODULE,
743 .release = unix_release,
744 .bind = unix_bind,
745 .connect = unix_stream_connect,
746 .socketpair = unix_socketpair,
747 .accept = unix_accept,
748 .getname = unix_getname,
749 .poll = unix_dgram_poll,
750 .ioctl = unix_ioctl,
Olivier Deprez0e641232021-09-23 10:07:05 +0200751#ifdef CONFIG_COMPAT
752 .compat_ioctl = unix_compat_ioctl,
753#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000754 .listen = unix_listen,
755 .shutdown = unix_shutdown,
756 .setsockopt = sock_no_setsockopt,
757 .getsockopt = sock_no_getsockopt,
758 .sendmsg = unix_seqpacket_sendmsg,
759 .recvmsg = unix_seqpacket_recvmsg,
760 .mmap = sock_no_mmap,
761 .sendpage = sock_no_sendpage,
762 .set_peek_off = unix_set_peek_off,
763};
764
765static struct proto unix_proto = {
766 .name = "UNIX",
767 .owner = THIS_MODULE,
768 .obj_size = sizeof(struct unix_sock),
769};
770
771static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
772{
773 struct sock *sk = NULL;
774 struct unix_sock *u;
775
776 atomic_long_inc(&unix_nr_socks);
777 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
778 goto out;
779
780 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
781 if (!sk)
782 goto out;
783
784 sock_init_data(sock, sk);
785
786 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
787 sk->sk_write_space = unix_write_space;
788 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
789 sk->sk_destruct = unix_sock_destructor;
790 u = unix_sk(sk);
791 u->path.dentry = NULL;
792 u->path.mnt = NULL;
793 spin_lock_init(&u->lock);
794 atomic_long_set(&u->inflight, 0);
795 INIT_LIST_HEAD(&u->link);
796 mutex_init(&u->iolock); /* single task reading lock */
797 mutex_init(&u->bindlock); /* single task binding lock */
798 init_waitqueue_head(&u->peer_wait);
799 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
800 unix_insert_socket(unix_sockets_unbound(sk), sk);
801out:
802 if (sk == NULL)
803 atomic_long_dec(&unix_nr_socks);
804 else {
805 local_bh_disable();
806 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
807 local_bh_enable();
808 }
809 return sk;
810}
811
812static int unix_create(struct net *net, struct socket *sock, int protocol,
813 int kern)
814{
815 if (protocol && protocol != PF_UNIX)
816 return -EPROTONOSUPPORT;
817
818 sock->state = SS_UNCONNECTED;
819
820 switch (sock->type) {
821 case SOCK_STREAM:
822 sock->ops = &unix_stream_ops;
823 break;
824 /*
825 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
826 * nothing uses it.
827 */
828 case SOCK_RAW:
829 sock->type = SOCK_DGRAM;
830 /* fall through */
831 case SOCK_DGRAM:
832 sock->ops = &unix_dgram_ops;
833 break;
834 case SOCK_SEQPACKET:
835 sock->ops = &unix_seqpacket_ops;
836 break;
837 default:
838 return -ESOCKTNOSUPPORT;
839 }
840
841 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
842}
843
844static int unix_release(struct socket *sock)
845{
846 struct sock *sk = sock->sk;
847
848 if (!sk)
849 return 0;
850
851 unix_release_sock(sk, 0);
852 sock->sk = NULL;
853
854 return 0;
855}
856
857static int unix_autobind(struct socket *sock)
858{
859 struct sock *sk = sock->sk;
860 struct net *net = sock_net(sk);
861 struct unix_sock *u = unix_sk(sk);
862 static u32 ordernum = 1;
863 struct unix_address *addr;
864 int err;
865 unsigned int retries = 0;
866
867 err = mutex_lock_interruptible(&u->bindlock);
868 if (err)
869 return err;
870
871 err = 0;
872 if (u->addr)
873 goto out;
874
875 err = -ENOMEM;
876 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
877 if (!addr)
878 goto out;
879
880 addr->name->sun_family = AF_UNIX;
881 refcount_set(&addr->refcnt, 1);
882
883retry:
884 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
885 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
886
887 spin_lock(&unix_table_lock);
888 ordernum = (ordernum+1)&0xFFFFF;
889
890 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
891 addr->hash)) {
892 spin_unlock(&unix_table_lock);
893 /*
894 * __unix_find_socket_byname() may take long time if many names
895 * are already in use.
896 */
897 cond_resched();
898 /* Give up if all names seems to be in use. */
899 if (retries++ == 0xFFFFF) {
900 err = -ENOSPC;
901 kfree(addr);
902 goto out;
903 }
904 goto retry;
905 }
906 addr->hash ^= sk->sk_type;
907
908 __unix_remove_socket(sk);
David Brazdil0f672f62019-12-10 10:32:29 +0000909 smp_store_release(&u->addr, addr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000910 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
911 spin_unlock(&unix_table_lock);
912 err = 0;
913
914out: mutex_unlock(&u->bindlock);
915 return err;
916}
917
918static struct sock *unix_find_other(struct net *net,
919 struct sockaddr_un *sunname, int len,
920 int type, unsigned int hash, int *error)
921{
922 struct sock *u;
923 struct path path;
924 int err = 0;
925
926 if (sunname->sun_path[0]) {
927 struct inode *inode;
928 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
929 if (err)
930 goto fail;
931 inode = d_backing_inode(path.dentry);
932 err = inode_permission(inode, MAY_WRITE);
933 if (err)
934 goto put_fail;
935
936 err = -ECONNREFUSED;
937 if (!S_ISSOCK(inode->i_mode))
938 goto put_fail;
939 u = unix_find_socket_byinode(inode);
940 if (!u)
941 goto put_fail;
942
943 if (u->sk_type == type)
944 touch_atime(&path);
945
946 path_put(&path);
947
948 err = -EPROTOTYPE;
949 if (u->sk_type != type) {
950 sock_put(u);
951 goto fail;
952 }
953 } else {
954 err = -ECONNREFUSED;
955 u = unix_find_socket_byname(net, sunname, len, type, hash);
956 if (u) {
957 struct dentry *dentry;
958 dentry = unix_sk(u)->path.dentry;
959 if (dentry)
960 touch_atime(&unix_sk(u)->path);
961 } else
962 goto fail;
963 }
964 return u;
965
966put_fail:
967 path_put(&path);
968fail:
969 *error = err;
970 return NULL;
971}
972
973static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
974{
975 struct dentry *dentry;
976 struct path path;
977 int err = 0;
978 /*
979 * Get the parent directory, calculate the hash for last
980 * component.
981 */
982 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
983 err = PTR_ERR(dentry);
984 if (IS_ERR(dentry))
985 return err;
986
987 /*
988 * All right, let's create it.
989 */
990 err = security_path_mknod(&path, dentry, mode, 0);
991 if (!err) {
992 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
993 if (!err) {
994 res->mnt = mntget(path.mnt);
995 res->dentry = dget(dentry);
996 }
997 }
998 done_path_create(&path, dentry);
999 return err;
1000}
1001
1002static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1003{
1004 struct sock *sk = sock->sk;
1005 struct net *net = sock_net(sk);
1006 struct unix_sock *u = unix_sk(sk);
1007 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1008 char *sun_path = sunaddr->sun_path;
1009 int err;
1010 unsigned int hash;
1011 struct unix_address *addr;
1012 struct hlist_head *list;
1013 struct path path = { };
1014
1015 err = -EINVAL;
1016 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1017 sunaddr->sun_family != AF_UNIX)
1018 goto out;
1019
1020 if (addr_len == sizeof(short)) {
1021 err = unix_autobind(sock);
1022 goto out;
1023 }
1024
1025 err = unix_mkname(sunaddr, addr_len, &hash);
1026 if (err < 0)
1027 goto out;
1028 addr_len = err;
1029
1030 if (sun_path[0]) {
1031 umode_t mode = S_IFSOCK |
1032 (SOCK_INODE(sock)->i_mode & ~current_umask());
1033 err = unix_mknod(sun_path, mode, &path);
1034 if (err) {
1035 if (err == -EEXIST)
1036 err = -EADDRINUSE;
1037 goto out;
1038 }
1039 }
1040
1041 err = mutex_lock_interruptible(&u->bindlock);
1042 if (err)
1043 goto out_put;
1044
1045 err = -EINVAL;
1046 if (u->addr)
1047 goto out_up;
1048
1049 err = -ENOMEM;
1050 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1051 if (!addr)
1052 goto out_up;
1053
1054 memcpy(addr->name, sunaddr, addr_len);
1055 addr->len = addr_len;
1056 addr->hash = hash ^ sk->sk_type;
1057 refcount_set(&addr->refcnt, 1);
1058
1059 if (sun_path[0]) {
1060 addr->hash = UNIX_HASH_SIZE;
1061 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1062 spin_lock(&unix_table_lock);
1063 u->path = path;
1064 list = &unix_socket_table[hash];
1065 } else {
1066 spin_lock(&unix_table_lock);
1067 err = -EADDRINUSE;
1068 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1069 sk->sk_type, hash)) {
1070 unix_release_addr(addr);
1071 goto out_unlock;
1072 }
1073
1074 list = &unix_socket_table[addr->hash];
1075 }
1076
1077 err = 0;
1078 __unix_remove_socket(sk);
David Brazdil0f672f62019-12-10 10:32:29 +00001079 smp_store_release(&u->addr, addr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001080 __unix_insert_socket(list, sk);
1081
1082out_unlock:
1083 spin_unlock(&unix_table_lock);
1084out_up:
1085 mutex_unlock(&u->bindlock);
1086out_put:
1087 if (err)
1088 path_put(&path);
1089out:
1090 return err;
1091}
1092
1093static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1094{
1095 if (unlikely(sk1 == sk2) || !sk2) {
1096 unix_state_lock(sk1);
1097 return;
1098 }
1099 if (sk1 < sk2) {
1100 unix_state_lock(sk1);
1101 unix_state_lock_nested(sk2);
1102 } else {
1103 unix_state_lock(sk2);
1104 unix_state_lock_nested(sk1);
1105 }
1106}
1107
1108static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1109{
1110 if (unlikely(sk1 == sk2) || !sk2) {
1111 unix_state_unlock(sk1);
1112 return;
1113 }
1114 unix_state_unlock(sk1);
1115 unix_state_unlock(sk2);
1116}
1117
1118static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1119 int alen, int flags)
1120{
1121 struct sock *sk = sock->sk;
1122 struct net *net = sock_net(sk);
1123 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1124 struct sock *other;
1125 unsigned int hash;
1126 int err;
1127
1128 err = -EINVAL;
1129 if (alen < offsetofend(struct sockaddr, sa_family))
1130 goto out;
1131
1132 if (addr->sa_family != AF_UNSPEC) {
1133 err = unix_mkname(sunaddr, alen, &hash);
1134 if (err < 0)
1135 goto out;
1136 alen = err;
1137
1138 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1139 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1140 goto out;
1141
1142restart:
1143 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1144 if (!other)
1145 goto out;
1146
1147 unix_state_double_lock(sk, other);
1148
1149 /* Apparently VFS overslept socket death. Retry. */
1150 if (sock_flag(other, SOCK_DEAD)) {
1151 unix_state_double_unlock(sk, other);
1152 sock_put(other);
1153 goto restart;
1154 }
1155
1156 err = -EPERM;
1157 if (!unix_may_send(sk, other))
1158 goto out_unlock;
1159
1160 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1161 if (err)
1162 goto out_unlock;
1163
1164 } else {
1165 /*
1166 * 1003.1g breaking connected state with AF_UNSPEC
1167 */
1168 other = NULL;
1169 unix_state_double_lock(sk, other);
1170 }
1171
1172 /*
1173 * If it was connected, reconnect.
1174 */
1175 if (unix_peer(sk)) {
1176 struct sock *old_peer = unix_peer(sk);
1177 unix_peer(sk) = other;
1178 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1179
1180 unix_state_double_unlock(sk, other);
1181
1182 if (other != old_peer)
1183 unix_dgram_disconnected(sk, old_peer);
1184 sock_put(old_peer);
1185 } else {
1186 unix_peer(sk) = other;
1187 unix_state_double_unlock(sk, other);
1188 }
1189 return 0;
1190
1191out_unlock:
1192 unix_state_double_unlock(sk, other);
1193 sock_put(other);
1194out:
1195 return err;
1196}
1197
1198static long unix_wait_for_peer(struct sock *other, long timeo)
1199{
1200 struct unix_sock *u = unix_sk(other);
1201 int sched;
1202 DEFINE_WAIT(wait);
1203
1204 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1205
1206 sched = !sock_flag(other, SOCK_DEAD) &&
1207 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1208 unix_recvq_full(other);
1209
1210 unix_state_unlock(other);
1211
1212 if (sched)
1213 timeo = schedule_timeout(timeo);
1214
1215 finish_wait(&u->peer_wait, &wait);
1216 return timeo;
1217}
1218
1219static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1220 int addr_len, int flags)
1221{
1222 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1223 struct sock *sk = sock->sk;
1224 struct net *net = sock_net(sk);
1225 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1226 struct sock *newsk = NULL;
1227 struct sock *other = NULL;
1228 struct sk_buff *skb = NULL;
1229 unsigned int hash;
1230 int st;
1231 int err;
1232 long timeo;
1233
1234 err = unix_mkname(sunaddr, addr_len, &hash);
1235 if (err < 0)
1236 goto out;
1237 addr_len = err;
1238
1239 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1240 (err = unix_autobind(sock)) != 0)
1241 goto out;
1242
1243 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1244
1245 /* First of all allocate resources.
1246 If we will make it after state is locked,
1247 we will have to recheck all again in any case.
1248 */
1249
1250 err = -ENOMEM;
1251
1252 /* create new sock for complete connection */
1253 newsk = unix_create1(sock_net(sk), NULL, 0);
1254 if (newsk == NULL)
1255 goto out;
1256
1257 /* Allocate skb for sending to listening sock */
1258 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1259 if (skb == NULL)
1260 goto out;
1261
1262restart:
1263 /* Find listening sock. */
1264 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1265 if (!other)
1266 goto out;
1267
1268 /* Latch state of peer */
1269 unix_state_lock(other);
1270
1271 /* Apparently VFS overslept socket death. Retry. */
1272 if (sock_flag(other, SOCK_DEAD)) {
1273 unix_state_unlock(other);
1274 sock_put(other);
1275 goto restart;
1276 }
1277
1278 err = -ECONNREFUSED;
1279 if (other->sk_state != TCP_LISTEN)
1280 goto out_unlock;
1281 if (other->sk_shutdown & RCV_SHUTDOWN)
1282 goto out_unlock;
1283
1284 if (unix_recvq_full(other)) {
1285 err = -EAGAIN;
1286 if (!timeo)
1287 goto out_unlock;
1288
1289 timeo = unix_wait_for_peer(other, timeo);
1290
1291 err = sock_intr_errno(timeo);
1292 if (signal_pending(current))
1293 goto out;
1294 sock_put(other);
1295 goto restart;
1296 }
1297
1298 /* Latch our state.
1299
1300 It is tricky place. We need to grab our state lock and cannot
1301 drop lock on peer. It is dangerous because deadlock is
1302 possible. Connect to self case and simultaneous
1303 attempt to connect are eliminated by checking socket
1304 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1305 check this before attempt to grab lock.
1306
1307 Well, and we have to recheck the state after socket locked.
1308 */
1309 st = sk->sk_state;
1310
1311 switch (st) {
1312 case TCP_CLOSE:
1313 /* This is ok... continue with connect */
1314 break;
1315 case TCP_ESTABLISHED:
1316 /* Socket is already connected */
1317 err = -EISCONN;
1318 goto out_unlock;
1319 default:
1320 err = -EINVAL;
1321 goto out_unlock;
1322 }
1323
1324 unix_state_lock_nested(sk);
1325
1326 if (sk->sk_state != st) {
1327 unix_state_unlock(sk);
1328 unix_state_unlock(other);
1329 sock_put(other);
1330 goto restart;
1331 }
1332
1333 err = security_unix_stream_connect(sk, other, newsk);
1334 if (err) {
1335 unix_state_unlock(sk);
1336 goto out_unlock;
1337 }
1338
1339 /* The way is open! Fastly set all the necessary fields... */
1340
1341 sock_hold(sk);
1342 unix_peer(newsk) = sk;
1343 newsk->sk_state = TCP_ESTABLISHED;
1344 newsk->sk_type = sk->sk_type;
1345 init_peercred(newsk);
1346 newu = unix_sk(newsk);
1347 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1348 otheru = unix_sk(other);
1349
David Brazdil0f672f62019-12-10 10:32:29 +00001350 /* copy address information from listening to new sock
1351 *
1352 * The contents of *(otheru->addr) and otheru->path
1353 * are seen fully set up here, since we have found
1354 * otheru in hash under unix_table_lock. Insertion
1355 * into the hash chain we'd found it in had been done
1356 * in an earlier critical area protected by unix_table_lock,
1357 * the same one where we'd set *(otheru->addr) contents,
1358 * as well as otheru->path and otheru->addr itself.
1359 *
1360 * Using smp_store_release() here to set newu->addr
1361 * is enough to make those stores, as well as stores
1362 * to newu->path visible to anyone who gets newu->addr
1363 * by smp_load_acquire(). IOW, the same warranties
1364 * as for unix_sock instances bound in unix_bind() or
1365 * in unix_autobind().
1366 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001367 if (otheru->path.dentry) {
1368 path_get(&otheru->path);
1369 newu->path = otheru->path;
1370 }
David Brazdil0f672f62019-12-10 10:32:29 +00001371 refcount_inc(&otheru->addr->refcnt);
1372 smp_store_release(&newu->addr, otheru->addr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001373
1374 /* Set credentials */
1375 copy_peercred(sk, other);
1376
1377 sock->state = SS_CONNECTED;
1378 sk->sk_state = TCP_ESTABLISHED;
1379 sock_hold(newsk);
1380
1381 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1382 unix_peer(sk) = newsk;
1383
1384 unix_state_unlock(sk);
1385
1386 /* take ten and and send info to listening sock */
1387 spin_lock(&other->sk_receive_queue.lock);
1388 __skb_queue_tail(&other->sk_receive_queue, skb);
1389 spin_unlock(&other->sk_receive_queue.lock);
1390 unix_state_unlock(other);
1391 other->sk_data_ready(other);
1392 sock_put(other);
1393 return 0;
1394
1395out_unlock:
1396 if (other)
1397 unix_state_unlock(other);
1398
1399out:
1400 kfree_skb(skb);
1401 if (newsk)
1402 unix_release_sock(newsk, 0);
1403 if (other)
1404 sock_put(other);
1405 return err;
1406}
1407
1408static int unix_socketpair(struct socket *socka, struct socket *sockb)
1409{
1410 struct sock *ska = socka->sk, *skb = sockb->sk;
1411
1412 /* Join our sockets back to back */
1413 sock_hold(ska);
1414 sock_hold(skb);
1415 unix_peer(ska) = skb;
1416 unix_peer(skb) = ska;
1417 init_peercred(ska);
1418 init_peercred(skb);
1419
1420 if (ska->sk_type != SOCK_DGRAM) {
1421 ska->sk_state = TCP_ESTABLISHED;
1422 skb->sk_state = TCP_ESTABLISHED;
1423 socka->state = SS_CONNECTED;
1424 sockb->state = SS_CONNECTED;
1425 }
1426 return 0;
1427}
1428
1429static void unix_sock_inherit_flags(const struct socket *old,
1430 struct socket *new)
1431{
1432 if (test_bit(SOCK_PASSCRED, &old->flags))
1433 set_bit(SOCK_PASSCRED, &new->flags);
1434 if (test_bit(SOCK_PASSSEC, &old->flags))
1435 set_bit(SOCK_PASSSEC, &new->flags);
1436}
1437
1438static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1439 bool kern)
1440{
1441 struct sock *sk = sock->sk;
1442 struct sock *tsk;
1443 struct sk_buff *skb;
1444 int err;
1445
1446 err = -EOPNOTSUPP;
1447 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1448 goto out;
1449
1450 err = -EINVAL;
1451 if (sk->sk_state != TCP_LISTEN)
1452 goto out;
1453
1454 /* If socket state is TCP_LISTEN it cannot change (for now...),
1455 * so that no locks are necessary.
1456 */
1457
1458 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1459 if (!skb) {
1460 /* This means receive shutdown. */
1461 if (err == 0)
1462 err = -EINVAL;
1463 goto out;
1464 }
1465
1466 tsk = skb->sk;
1467 skb_free_datagram(sk, skb);
1468 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1469
1470 /* attach accepted sock to socket */
1471 unix_state_lock(tsk);
1472 newsock->state = SS_CONNECTED;
1473 unix_sock_inherit_flags(sock, newsock);
1474 sock_graft(tsk, newsock);
1475 unix_state_unlock(tsk);
1476 return 0;
1477
1478out:
1479 return err;
1480}
1481
1482
1483static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1484{
1485 struct sock *sk = sock->sk;
David Brazdil0f672f62019-12-10 10:32:29 +00001486 struct unix_address *addr;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001487 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1488 int err = 0;
1489
1490 if (peer) {
1491 sk = unix_peer_get(sk);
1492
1493 err = -ENOTCONN;
1494 if (!sk)
1495 goto out;
1496 err = 0;
1497 } else {
1498 sock_hold(sk);
1499 }
1500
David Brazdil0f672f62019-12-10 10:32:29 +00001501 addr = smp_load_acquire(&unix_sk(sk)->addr);
1502 if (!addr) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001503 sunaddr->sun_family = AF_UNIX;
1504 sunaddr->sun_path[0] = 0;
1505 err = sizeof(short);
1506 } else {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001507 err = addr->len;
1508 memcpy(sunaddr, addr->name, addr->len);
1509 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001510 sock_put(sk);
1511out:
1512 return err;
1513}
1514
Olivier Deprez0e641232021-09-23 10:07:05 +02001515static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1516{
1517 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1518
1519 /*
1520 * Garbage collection of unix sockets starts by selecting a set of
1521 * candidate sockets which have reference only from being in flight
1522 * (total_refs == inflight_refs). This condition is checked once during
1523 * the candidate collection phase, and candidates are marked as such, so
1524 * that non-candidates can later be ignored. While inflight_refs is
1525 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1526 * is an instantaneous decision.
1527 *
1528 * Once a candidate, however, the socket must not be reinstalled into a
1529 * file descriptor while the garbage collection is in progress.
1530 *
1531 * If the above conditions are met, then the directed graph of
1532 * candidates (*) does not change while unix_gc_lock is held.
1533 *
1534 * Any operations that changes the file count through file descriptors
1535 * (dup, close, sendmsg) does not change the graph since candidates are
1536 * not installed in fds.
1537 *
1538 * Dequeing a candidate via recvmsg would install it into an fd, but
1539 * that takes unix_gc_lock to decrement the inflight count, so it's
1540 * serialized with garbage collection.
1541 *
1542 * MSG_PEEK is special in that it does not change the inflight count,
1543 * yet does install the socket into an fd. The following lock/unlock
1544 * pair is to ensure serialization with garbage collection. It must be
1545 * done between incrementing the file count and installing the file into
1546 * an fd.
1547 *
1548 * If garbage collection starts after the barrier provided by the
1549 * lock/unlock, then it will see the elevated refcount and not mark this
1550 * as a candidate. If a garbage collection is already in progress
1551 * before the file count was incremented, then the lock/unlock pair will
1552 * ensure that garbage collection is finished before progressing to
1553 * installing the fd.
1554 *
1555 * (*) A -> B where B is on the queue of A or B is on the queue of C
1556 * which is on the queue of listening socket A.
1557 */
1558 spin_lock(&unix_gc_lock);
1559 spin_unlock(&unix_gc_lock);
1560}
1561
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001562static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1563{
1564 int err = 0;
1565
1566 UNIXCB(skb).pid = get_pid(scm->pid);
1567 UNIXCB(skb).uid = scm->creds.uid;
1568 UNIXCB(skb).gid = scm->creds.gid;
1569 UNIXCB(skb).fp = NULL;
1570 unix_get_secdata(scm, skb);
1571 if (scm->fp && send_fds)
1572 err = unix_attach_fds(scm, skb);
1573
1574 skb->destructor = unix_destruct_scm;
1575 return err;
1576}
1577
1578static bool unix_passcred_enabled(const struct socket *sock,
1579 const struct sock *other)
1580{
1581 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1582 !other->sk_socket ||
1583 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1584}
1585
1586/*
1587 * Some apps rely on write() giving SCM_CREDENTIALS
1588 * We include credentials if source or destination socket
1589 * asserted SOCK_PASSCRED.
1590 */
1591static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1592 const struct sock *other)
1593{
1594 if (UNIXCB(skb).pid)
1595 return;
1596 if (unix_passcred_enabled(sock, other)) {
1597 UNIXCB(skb).pid = get_pid(task_tgid(current));
1598 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1599 }
1600}
1601
1602static int maybe_init_creds(struct scm_cookie *scm,
1603 struct socket *socket,
1604 const struct sock *other)
1605{
1606 int err;
1607 struct msghdr msg = { .msg_controllen = 0 };
1608
1609 err = scm_send(socket, &msg, scm, false);
1610 if (err)
1611 return err;
1612
1613 if (unix_passcred_enabled(socket, other)) {
1614 scm->pid = get_pid(task_tgid(current));
1615 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1616 }
1617 return err;
1618}
1619
1620static bool unix_skb_scm_eq(struct sk_buff *skb,
1621 struct scm_cookie *scm)
1622{
1623 const struct unix_skb_parms *u = &UNIXCB(skb);
1624
1625 return u->pid == scm->pid &&
1626 uid_eq(u->uid, scm->creds.uid) &&
1627 gid_eq(u->gid, scm->creds.gid) &&
1628 unix_secdata_eq(scm, skb);
1629}
1630
1631/*
1632 * Send AF_UNIX data.
1633 */
1634
1635static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1636 size_t len)
1637{
1638 struct sock *sk = sock->sk;
1639 struct net *net = sock_net(sk);
1640 struct unix_sock *u = unix_sk(sk);
1641 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1642 struct sock *other = NULL;
1643 int namelen = 0; /* fake GCC */
1644 int err;
1645 unsigned int hash;
1646 struct sk_buff *skb;
1647 long timeo;
1648 struct scm_cookie scm;
1649 int data_len = 0;
1650 int sk_locked;
1651
1652 wait_for_unix_gc();
1653 err = scm_send(sock, msg, &scm, false);
1654 if (err < 0)
1655 return err;
1656
1657 err = -EOPNOTSUPP;
1658 if (msg->msg_flags&MSG_OOB)
1659 goto out;
1660
1661 if (msg->msg_namelen) {
1662 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1663 if (err < 0)
1664 goto out;
1665 namelen = err;
1666 } else {
1667 sunaddr = NULL;
1668 err = -ENOTCONN;
1669 other = unix_peer_get(sk);
1670 if (!other)
1671 goto out;
1672 }
1673
1674 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1675 && (err = unix_autobind(sock)) != 0)
1676 goto out;
1677
1678 err = -EMSGSIZE;
1679 if (len > sk->sk_sndbuf - 32)
1680 goto out;
1681
1682 if (len > SKB_MAX_ALLOC) {
1683 data_len = min_t(size_t,
1684 len - SKB_MAX_ALLOC,
1685 MAX_SKB_FRAGS * PAGE_SIZE);
1686 data_len = PAGE_ALIGN(data_len);
1687
1688 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1689 }
1690
1691 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1692 msg->msg_flags & MSG_DONTWAIT, &err,
1693 PAGE_ALLOC_COSTLY_ORDER);
1694 if (skb == NULL)
1695 goto out;
1696
1697 err = unix_scm_to_skb(&scm, skb, true);
1698 if (err < 0)
1699 goto out_free;
1700
1701 skb_put(skb, len - data_len);
1702 skb->data_len = data_len;
1703 skb->len = len;
1704 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1705 if (err)
1706 goto out_free;
1707
1708 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1709
1710restart:
1711 if (!other) {
1712 err = -ECONNRESET;
1713 if (sunaddr == NULL)
1714 goto out_free;
1715
1716 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1717 hash, &err);
1718 if (other == NULL)
1719 goto out_free;
1720 }
1721
1722 if (sk_filter(other, skb) < 0) {
1723 /* Toss the packet but do not return any error to the sender */
1724 err = len;
1725 goto out_free;
1726 }
1727
1728 sk_locked = 0;
1729 unix_state_lock(other);
1730restart_locked:
1731 err = -EPERM;
1732 if (!unix_may_send(sk, other))
1733 goto out_unlock;
1734
1735 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1736 /*
1737 * Check with 1003.1g - what should
1738 * datagram error
1739 */
1740 unix_state_unlock(other);
1741 sock_put(other);
1742
1743 if (!sk_locked)
1744 unix_state_lock(sk);
1745
1746 err = 0;
1747 if (unix_peer(sk) == other) {
1748 unix_peer(sk) = NULL;
1749 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1750
1751 unix_state_unlock(sk);
1752
1753 unix_dgram_disconnected(sk, other);
1754 sock_put(other);
1755 err = -ECONNREFUSED;
1756 } else {
1757 unix_state_unlock(sk);
1758 }
1759
1760 other = NULL;
1761 if (err)
1762 goto out_free;
1763 goto restart;
1764 }
1765
1766 err = -EPIPE;
1767 if (other->sk_shutdown & RCV_SHUTDOWN)
1768 goto out_unlock;
1769
1770 if (sk->sk_type != SOCK_SEQPACKET) {
1771 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1772 if (err)
1773 goto out_unlock;
1774 }
1775
1776 /* other == sk && unix_peer(other) != sk if
1777 * - unix_peer(sk) == NULL, destination address bound to sk
1778 * - unix_peer(sk) == sk by time of get but disconnected before lock
1779 */
1780 if (other != sk &&
Olivier Deprez0e641232021-09-23 10:07:05 +02001781 unlikely(unix_peer(other) != sk &&
1782 unix_recvq_full_lockless(other))) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001783 if (timeo) {
1784 timeo = unix_wait_for_peer(other, timeo);
1785
1786 err = sock_intr_errno(timeo);
1787 if (signal_pending(current))
1788 goto out_free;
1789
1790 goto restart;
1791 }
1792
1793 if (!sk_locked) {
1794 unix_state_unlock(other);
1795 unix_state_double_lock(sk, other);
1796 }
1797
1798 if (unix_peer(sk) != other ||
1799 unix_dgram_peer_wake_me(sk, other)) {
1800 err = -EAGAIN;
1801 sk_locked = 1;
1802 goto out_unlock;
1803 }
1804
1805 if (!sk_locked) {
1806 sk_locked = 1;
1807 goto restart_locked;
1808 }
1809 }
1810
1811 if (unlikely(sk_locked))
1812 unix_state_unlock(sk);
1813
1814 if (sock_flag(other, SOCK_RCVTSTAMP))
1815 __net_timestamp(skb);
1816 maybe_add_creds(skb, sock, other);
1817 skb_queue_tail(&other->sk_receive_queue, skb);
1818 unix_state_unlock(other);
1819 other->sk_data_ready(other);
1820 sock_put(other);
1821 scm_destroy(&scm);
1822 return len;
1823
1824out_unlock:
1825 if (sk_locked)
1826 unix_state_unlock(sk);
1827 unix_state_unlock(other);
1828out_free:
1829 kfree_skb(skb);
1830out:
1831 if (other)
1832 sock_put(other);
1833 scm_destroy(&scm);
1834 return err;
1835}
1836
1837/* We use paged skbs for stream sockets, and limit occupancy to 32768
1838 * bytes, and a minimum of a full page.
1839 */
1840#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1841
1842static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1843 size_t len)
1844{
1845 struct sock *sk = sock->sk;
1846 struct sock *other = NULL;
1847 int err, size;
1848 struct sk_buff *skb;
1849 int sent = 0;
1850 struct scm_cookie scm;
1851 bool fds_sent = false;
1852 int data_len;
1853
1854 wait_for_unix_gc();
1855 err = scm_send(sock, msg, &scm, false);
1856 if (err < 0)
1857 return err;
1858
1859 err = -EOPNOTSUPP;
1860 if (msg->msg_flags&MSG_OOB)
1861 goto out_err;
1862
1863 if (msg->msg_namelen) {
1864 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1865 goto out_err;
1866 } else {
1867 err = -ENOTCONN;
1868 other = unix_peer(sk);
1869 if (!other)
1870 goto out_err;
1871 }
1872
1873 if (sk->sk_shutdown & SEND_SHUTDOWN)
1874 goto pipe_err;
1875
1876 while (sent < len) {
1877 size = len - sent;
1878
1879 /* Keep two messages in the pipe so it schedules better */
1880 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1881
1882 /* allow fallback to order-0 allocations */
1883 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1884
1885 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1886
1887 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1888
1889 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1890 msg->msg_flags & MSG_DONTWAIT, &err,
1891 get_order(UNIX_SKB_FRAGS_SZ));
1892 if (!skb)
1893 goto out_err;
1894
1895 /* Only send the fds in the first buffer */
1896 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1897 if (err < 0) {
1898 kfree_skb(skb);
1899 goto out_err;
1900 }
1901 fds_sent = true;
1902
1903 skb_put(skb, size - data_len);
1904 skb->data_len = data_len;
1905 skb->len = size;
1906 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1907 if (err) {
1908 kfree_skb(skb);
1909 goto out_err;
1910 }
1911
1912 unix_state_lock(other);
1913
1914 if (sock_flag(other, SOCK_DEAD) ||
1915 (other->sk_shutdown & RCV_SHUTDOWN))
1916 goto pipe_err_free;
1917
1918 maybe_add_creds(skb, sock, other);
1919 skb_queue_tail(&other->sk_receive_queue, skb);
1920 unix_state_unlock(other);
1921 other->sk_data_ready(other);
1922 sent += size;
1923 }
1924
1925 scm_destroy(&scm);
1926
1927 return sent;
1928
1929pipe_err_free:
1930 unix_state_unlock(other);
1931 kfree_skb(skb);
1932pipe_err:
1933 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1934 send_sig(SIGPIPE, current, 0);
1935 err = -EPIPE;
1936out_err:
1937 scm_destroy(&scm);
1938 return sent ? : err;
1939}
1940
1941static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1942 int offset, size_t size, int flags)
1943{
1944 int err;
1945 bool send_sigpipe = false;
1946 bool init_scm = true;
1947 struct scm_cookie scm;
1948 struct sock *other, *sk = socket->sk;
1949 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1950
1951 if (flags & MSG_OOB)
1952 return -EOPNOTSUPP;
1953
1954 other = unix_peer(sk);
1955 if (!other || sk->sk_state != TCP_ESTABLISHED)
1956 return -ENOTCONN;
1957
1958 if (false) {
1959alloc_skb:
1960 unix_state_unlock(other);
1961 mutex_unlock(&unix_sk(other)->iolock);
1962 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1963 &err, 0);
1964 if (!newskb)
1965 goto err;
1966 }
1967
1968 /* we must acquire iolock as we modify already present
1969 * skbs in the sk_receive_queue and mess with skb->len
1970 */
1971 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1972 if (err) {
1973 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1974 goto err;
1975 }
1976
1977 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1978 err = -EPIPE;
1979 send_sigpipe = true;
1980 goto err_unlock;
1981 }
1982
1983 unix_state_lock(other);
1984
1985 if (sock_flag(other, SOCK_DEAD) ||
1986 other->sk_shutdown & RCV_SHUTDOWN) {
1987 err = -EPIPE;
1988 send_sigpipe = true;
1989 goto err_state_unlock;
1990 }
1991
1992 if (init_scm) {
1993 err = maybe_init_creds(&scm, socket, other);
1994 if (err)
1995 goto err_state_unlock;
1996 init_scm = false;
1997 }
1998
1999 skb = skb_peek_tail(&other->sk_receive_queue);
2000 if (tail && tail == skb) {
2001 skb = newskb;
2002 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2003 if (newskb) {
2004 skb = newskb;
2005 } else {
2006 tail = skb;
2007 goto alloc_skb;
2008 }
2009 } else if (newskb) {
2010 /* this is fast path, we don't necessarily need to
2011 * call to kfree_skb even though with newskb == NULL
2012 * this - does no harm
2013 */
2014 consume_skb(newskb);
2015 newskb = NULL;
2016 }
2017
2018 if (skb_append_pagefrags(skb, page, offset, size)) {
2019 tail = skb;
2020 goto alloc_skb;
2021 }
2022
2023 skb->len += size;
2024 skb->data_len += size;
2025 skb->truesize += size;
2026 refcount_add(size, &sk->sk_wmem_alloc);
2027
2028 if (newskb) {
2029 err = unix_scm_to_skb(&scm, skb, false);
2030 if (err)
2031 goto err_state_unlock;
2032 spin_lock(&other->sk_receive_queue.lock);
2033 __skb_queue_tail(&other->sk_receive_queue, newskb);
2034 spin_unlock(&other->sk_receive_queue.lock);
2035 }
2036
2037 unix_state_unlock(other);
2038 mutex_unlock(&unix_sk(other)->iolock);
2039
2040 other->sk_data_ready(other);
2041 scm_destroy(&scm);
2042 return size;
2043
2044err_state_unlock:
2045 unix_state_unlock(other);
2046err_unlock:
2047 mutex_unlock(&unix_sk(other)->iolock);
2048err:
2049 kfree_skb(newskb);
2050 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2051 send_sig(SIGPIPE, current, 0);
2052 if (!init_scm)
2053 scm_destroy(&scm);
2054 return err;
2055}
2056
2057static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2058 size_t len)
2059{
2060 int err;
2061 struct sock *sk = sock->sk;
2062
2063 err = sock_error(sk);
2064 if (err)
2065 return err;
2066
2067 if (sk->sk_state != TCP_ESTABLISHED)
2068 return -ENOTCONN;
2069
2070 if (msg->msg_namelen)
2071 msg->msg_namelen = 0;
2072
2073 return unix_dgram_sendmsg(sock, msg, len);
2074}
2075
2076static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2077 size_t size, int flags)
2078{
2079 struct sock *sk = sock->sk;
2080
2081 if (sk->sk_state != TCP_ESTABLISHED)
2082 return -ENOTCONN;
2083
2084 return unix_dgram_recvmsg(sock, msg, size, flags);
2085}
2086
2087static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2088{
David Brazdil0f672f62019-12-10 10:32:29 +00002089 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002090
David Brazdil0f672f62019-12-10 10:32:29 +00002091 if (addr) {
2092 msg->msg_namelen = addr->len;
2093 memcpy(msg->msg_name, addr->name, addr->len);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002094 }
2095}
2096
2097static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2098 size_t size, int flags)
2099{
2100 struct scm_cookie scm;
2101 struct sock *sk = sock->sk;
2102 struct unix_sock *u = unix_sk(sk);
2103 struct sk_buff *skb, *last;
2104 long timeo;
David Brazdil0f672f62019-12-10 10:32:29 +00002105 int skip;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002106 int err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002107
2108 err = -EOPNOTSUPP;
2109 if (flags&MSG_OOB)
2110 goto out;
2111
2112 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2113
2114 do {
2115 mutex_lock(&u->iolock);
2116
2117 skip = sk_peek_offset(sk, flags);
David Brazdil0f672f62019-12-10 10:32:29 +00002118 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2119 &last);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002120 if (skb)
2121 break;
2122
2123 mutex_unlock(&u->iolock);
2124
2125 if (err != -EAGAIN)
2126 break;
2127 } while (timeo &&
2128 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2129
2130 if (!skb) { /* implies iolock unlocked */
2131 unix_state_lock(sk);
2132 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2133 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2134 (sk->sk_shutdown & RCV_SHUTDOWN))
2135 err = 0;
2136 unix_state_unlock(sk);
2137 goto out;
2138 }
2139
2140 if (wq_has_sleeper(&u->peer_wait))
2141 wake_up_interruptible_sync_poll(&u->peer_wait,
2142 EPOLLOUT | EPOLLWRNORM |
2143 EPOLLWRBAND);
2144
2145 if (msg->msg_name)
2146 unix_copy_addr(msg, skb->sk);
2147
2148 if (size > skb->len - skip)
2149 size = skb->len - skip;
2150 else if (size < skb->len - skip)
2151 msg->msg_flags |= MSG_TRUNC;
2152
2153 err = skb_copy_datagram_msg(skb, skip, msg, size);
2154 if (err)
2155 goto out_free;
2156
2157 if (sock_flag(sk, SOCK_RCVTSTAMP))
2158 __sock_recv_timestamp(msg, sk, skb);
2159
2160 memset(&scm, 0, sizeof(scm));
2161
2162 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2163 unix_set_secdata(&scm, skb);
2164
2165 if (!(flags & MSG_PEEK)) {
2166 if (UNIXCB(skb).fp)
2167 unix_detach_fds(&scm, skb);
2168
2169 sk_peek_offset_bwd(sk, skb->len);
2170 } else {
2171 /* It is questionable: on PEEK we could:
2172 - do not return fds - good, but too simple 8)
2173 - return fds, and do not return them on read (old strategy,
2174 apparently wrong)
2175 - clone fds (I chose it for now, it is the most universal
2176 solution)
2177
2178 POSIX 1003.1g does not actually define this clearly
2179 at all. POSIX 1003.1g doesn't define a lot of things
2180 clearly however!
2181
2182 */
2183
2184 sk_peek_offset_fwd(sk, size);
2185
2186 if (UNIXCB(skb).fp)
Olivier Deprez0e641232021-09-23 10:07:05 +02002187 unix_peek_fds(&scm, skb);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002188 }
2189 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2190
2191 scm_recv(sock, msg, &scm, flags);
2192
2193out_free:
2194 skb_free_datagram(sk, skb);
2195 mutex_unlock(&u->iolock);
2196out:
2197 return err;
2198}
2199
2200/*
2201 * Sleep until more data has arrived. But check for races..
2202 */
2203static long unix_stream_data_wait(struct sock *sk, long timeo,
2204 struct sk_buff *last, unsigned int last_len,
2205 bool freezable)
2206{
2207 struct sk_buff *tail;
2208 DEFINE_WAIT(wait);
2209
2210 unix_state_lock(sk);
2211
2212 for (;;) {
2213 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2214
2215 tail = skb_peek_tail(&sk->sk_receive_queue);
2216 if (tail != last ||
2217 (tail && tail->len != last_len) ||
2218 sk->sk_err ||
2219 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2220 signal_pending(current) ||
2221 !timeo)
2222 break;
2223
2224 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2225 unix_state_unlock(sk);
2226 if (freezable)
2227 timeo = freezable_schedule_timeout(timeo);
2228 else
2229 timeo = schedule_timeout(timeo);
2230 unix_state_lock(sk);
2231
2232 if (sock_flag(sk, SOCK_DEAD))
2233 break;
2234
2235 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2236 }
2237
2238 finish_wait(sk_sleep(sk), &wait);
2239 unix_state_unlock(sk);
2240 return timeo;
2241}
2242
2243static unsigned int unix_skb_len(const struct sk_buff *skb)
2244{
2245 return skb->len - UNIXCB(skb).consumed;
2246}
2247
2248struct unix_stream_read_state {
2249 int (*recv_actor)(struct sk_buff *, int, int,
2250 struct unix_stream_read_state *);
2251 struct socket *socket;
2252 struct msghdr *msg;
2253 struct pipe_inode_info *pipe;
2254 size_t size;
2255 int flags;
2256 unsigned int splice_flags;
2257};
2258
2259static int unix_stream_read_generic(struct unix_stream_read_state *state,
2260 bool freezable)
2261{
2262 struct scm_cookie scm;
2263 struct socket *sock = state->socket;
2264 struct sock *sk = sock->sk;
2265 struct unix_sock *u = unix_sk(sk);
2266 int copied = 0;
2267 int flags = state->flags;
2268 int noblock = flags & MSG_DONTWAIT;
2269 bool check_creds = false;
2270 int target;
2271 int err = 0;
2272 long timeo;
2273 int skip;
2274 size_t size = state->size;
2275 unsigned int last_len;
2276
2277 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2278 err = -EINVAL;
2279 goto out;
2280 }
2281
2282 if (unlikely(flags & MSG_OOB)) {
2283 err = -EOPNOTSUPP;
2284 goto out;
2285 }
2286
2287 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2288 timeo = sock_rcvtimeo(sk, noblock);
2289
2290 memset(&scm, 0, sizeof(scm));
2291
2292 /* Lock the socket to prevent queue disordering
2293 * while sleeps in memcpy_tomsg
2294 */
2295 mutex_lock(&u->iolock);
2296
2297 skip = max(sk_peek_offset(sk, flags), 0);
2298
2299 do {
2300 int chunk;
2301 bool drop_skb;
2302 struct sk_buff *skb, *last;
2303
2304redo:
2305 unix_state_lock(sk);
2306 if (sock_flag(sk, SOCK_DEAD)) {
2307 err = -ECONNRESET;
2308 goto unlock;
2309 }
2310 last = skb = skb_peek(&sk->sk_receive_queue);
2311 last_len = last ? last->len : 0;
2312again:
2313 if (skb == NULL) {
2314 if (copied >= target)
2315 goto unlock;
2316
2317 /*
2318 * POSIX 1003.1g mandates this order.
2319 */
2320
2321 err = sock_error(sk);
2322 if (err)
2323 goto unlock;
2324 if (sk->sk_shutdown & RCV_SHUTDOWN)
2325 goto unlock;
2326
2327 unix_state_unlock(sk);
2328 if (!timeo) {
2329 err = -EAGAIN;
2330 break;
2331 }
2332
2333 mutex_unlock(&u->iolock);
2334
2335 timeo = unix_stream_data_wait(sk, timeo, last,
2336 last_len, freezable);
2337
2338 if (signal_pending(current)) {
2339 err = sock_intr_errno(timeo);
2340 scm_destroy(&scm);
2341 goto out;
2342 }
2343
2344 mutex_lock(&u->iolock);
2345 goto redo;
2346unlock:
2347 unix_state_unlock(sk);
2348 break;
2349 }
2350
2351 while (skip >= unix_skb_len(skb)) {
2352 skip -= unix_skb_len(skb);
2353 last = skb;
2354 last_len = skb->len;
2355 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2356 if (!skb)
2357 goto again;
2358 }
2359
2360 unix_state_unlock(sk);
2361
2362 if (check_creds) {
2363 /* Never glue messages from different writers */
2364 if (!unix_skb_scm_eq(skb, &scm))
2365 break;
2366 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2367 /* Copy credentials */
2368 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2369 unix_set_secdata(&scm, skb);
2370 check_creds = true;
2371 }
2372
2373 /* Copy address just once */
2374 if (state->msg && state->msg->msg_name) {
2375 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2376 state->msg->msg_name);
2377 unix_copy_addr(state->msg, skb->sk);
2378 sunaddr = NULL;
2379 }
2380
2381 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2382 skb_get(skb);
2383 chunk = state->recv_actor(skb, skip, chunk, state);
2384 drop_skb = !unix_skb_len(skb);
2385 /* skb is only safe to use if !drop_skb */
2386 consume_skb(skb);
2387 if (chunk < 0) {
2388 if (copied == 0)
2389 copied = -EFAULT;
2390 break;
2391 }
2392 copied += chunk;
2393 size -= chunk;
2394
2395 if (drop_skb) {
2396 /* the skb was touched by a concurrent reader;
2397 * we should not expect anything from this skb
2398 * anymore and assume it invalid - we can be
2399 * sure it was dropped from the socket queue
2400 *
2401 * let's report a short read
2402 */
2403 err = 0;
2404 break;
2405 }
2406
2407 /* Mark read part of skb as used */
2408 if (!(flags & MSG_PEEK)) {
2409 UNIXCB(skb).consumed += chunk;
2410
2411 sk_peek_offset_bwd(sk, chunk);
2412
2413 if (UNIXCB(skb).fp)
2414 unix_detach_fds(&scm, skb);
2415
2416 if (unix_skb_len(skb))
2417 break;
2418
2419 skb_unlink(skb, &sk->sk_receive_queue);
2420 consume_skb(skb);
2421
2422 if (scm.fp)
2423 break;
2424 } else {
2425 /* It is questionable, see note in unix_dgram_recvmsg.
2426 */
2427 if (UNIXCB(skb).fp)
Olivier Deprez0e641232021-09-23 10:07:05 +02002428 unix_peek_fds(&scm, skb);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002429
2430 sk_peek_offset_fwd(sk, chunk);
2431
2432 if (UNIXCB(skb).fp)
2433 break;
2434
2435 skip = 0;
2436 last = skb;
2437 last_len = skb->len;
2438 unix_state_lock(sk);
2439 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2440 if (skb)
2441 goto again;
2442 unix_state_unlock(sk);
2443 break;
2444 }
2445 } while (size);
2446
2447 mutex_unlock(&u->iolock);
2448 if (state->msg)
2449 scm_recv(sock, state->msg, &scm, flags);
2450 else
2451 scm_destroy(&scm);
2452out:
2453 return copied ? : err;
2454}
2455
2456static int unix_stream_read_actor(struct sk_buff *skb,
2457 int skip, int chunk,
2458 struct unix_stream_read_state *state)
2459{
2460 int ret;
2461
2462 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2463 state->msg, chunk);
2464 return ret ?: chunk;
2465}
2466
2467static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2468 size_t size, int flags)
2469{
2470 struct unix_stream_read_state state = {
2471 .recv_actor = unix_stream_read_actor,
2472 .socket = sock,
2473 .msg = msg,
2474 .size = size,
2475 .flags = flags
2476 };
2477
2478 return unix_stream_read_generic(&state, true);
2479}
2480
2481static int unix_stream_splice_actor(struct sk_buff *skb,
2482 int skip, int chunk,
2483 struct unix_stream_read_state *state)
2484{
2485 return skb_splice_bits(skb, state->socket->sk,
2486 UNIXCB(skb).consumed + skip,
2487 state->pipe, chunk, state->splice_flags);
2488}
2489
2490static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2491 struct pipe_inode_info *pipe,
2492 size_t size, unsigned int flags)
2493{
2494 struct unix_stream_read_state state = {
2495 .recv_actor = unix_stream_splice_actor,
2496 .socket = sock,
2497 .pipe = pipe,
2498 .size = size,
2499 .splice_flags = flags,
2500 };
2501
2502 if (unlikely(*ppos))
2503 return -ESPIPE;
2504
2505 if (sock->file->f_flags & O_NONBLOCK ||
2506 flags & SPLICE_F_NONBLOCK)
2507 state.flags = MSG_DONTWAIT;
2508
2509 return unix_stream_read_generic(&state, false);
2510}
2511
2512static int unix_shutdown(struct socket *sock, int mode)
2513{
2514 struct sock *sk = sock->sk;
2515 struct sock *other;
2516
2517 if (mode < SHUT_RD || mode > SHUT_RDWR)
2518 return -EINVAL;
2519 /* This maps:
2520 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2521 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2522 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2523 */
2524 ++mode;
2525
2526 unix_state_lock(sk);
2527 sk->sk_shutdown |= mode;
2528 other = unix_peer(sk);
2529 if (other)
2530 sock_hold(other);
2531 unix_state_unlock(sk);
2532 sk->sk_state_change(sk);
2533
2534 if (other &&
2535 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2536
2537 int peer_mode = 0;
2538
2539 if (mode&RCV_SHUTDOWN)
2540 peer_mode |= SEND_SHUTDOWN;
2541 if (mode&SEND_SHUTDOWN)
2542 peer_mode |= RCV_SHUTDOWN;
2543 unix_state_lock(other);
2544 other->sk_shutdown |= peer_mode;
2545 unix_state_unlock(other);
2546 other->sk_state_change(other);
2547 if (peer_mode == SHUTDOWN_MASK)
2548 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2549 else if (peer_mode & RCV_SHUTDOWN)
2550 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2551 }
2552 if (other)
2553 sock_put(other);
2554
2555 return 0;
2556}
2557
2558long unix_inq_len(struct sock *sk)
2559{
2560 struct sk_buff *skb;
2561 long amount = 0;
2562
2563 if (sk->sk_state == TCP_LISTEN)
2564 return -EINVAL;
2565
2566 spin_lock(&sk->sk_receive_queue.lock);
2567 if (sk->sk_type == SOCK_STREAM ||
2568 sk->sk_type == SOCK_SEQPACKET) {
2569 skb_queue_walk(&sk->sk_receive_queue, skb)
2570 amount += unix_skb_len(skb);
2571 } else {
2572 skb = skb_peek(&sk->sk_receive_queue);
2573 if (skb)
2574 amount = skb->len;
2575 }
2576 spin_unlock(&sk->sk_receive_queue.lock);
2577
2578 return amount;
2579}
2580EXPORT_SYMBOL_GPL(unix_inq_len);
2581
2582long unix_outq_len(struct sock *sk)
2583{
2584 return sk_wmem_alloc_get(sk);
2585}
2586EXPORT_SYMBOL_GPL(unix_outq_len);
2587
2588static int unix_open_file(struct sock *sk)
2589{
2590 struct path path;
2591 struct file *f;
2592 int fd;
2593
2594 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2595 return -EPERM;
2596
David Brazdil0f672f62019-12-10 10:32:29 +00002597 if (!smp_load_acquire(&unix_sk(sk)->addr))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002598 return -ENOENT;
David Brazdil0f672f62019-12-10 10:32:29 +00002599
2600 path = unix_sk(sk)->path;
2601 if (!path.dentry)
2602 return -ENOENT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002603
2604 path_get(&path);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002605
2606 fd = get_unused_fd_flags(O_CLOEXEC);
2607 if (fd < 0)
2608 goto out;
2609
2610 f = dentry_open(&path, O_PATH, current_cred());
2611 if (IS_ERR(f)) {
2612 put_unused_fd(fd);
2613 fd = PTR_ERR(f);
2614 goto out;
2615 }
2616
2617 fd_install(fd, f);
2618out:
2619 path_put(&path);
2620
2621 return fd;
2622}
2623
2624static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2625{
2626 struct sock *sk = sock->sk;
2627 long amount = 0;
2628 int err;
2629
2630 switch (cmd) {
2631 case SIOCOUTQ:
2632 amount = unix_outq_len(sk);
2633 err = put_user(amount, (int __user *)arg);
2634 break;
2635 case SIOCINQ:
2636 amount = unix_inq_len(sk);
2637 if (amount < 0)
2638 err = amount;
2639 else
2640 err = put_user(amount, (int __user *)arg);
2641 break;
2642 case SIOCUNIXFILE:
2643 err = unix_open_file(sk);
2644 break;
2645 default:
2646 err = -ENOIOCTLCMD;
2647 break;
2648 }
2649 return err;
2650}
2651
Olivier Deprez0e641232021-09-23 10:07:05 +02002652#ifdef CONFIG_COMPAT
2653static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2654{
2655 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2656}
2657#endif
2658
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002659static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2660{
2661 struct sock *sk = sock->sk;
2662 __poll_t mask;
2663
2664 sock_poll_wait(file, sock, wait);
2665 mask = 0;
2666
2667 /* exceptional events? */
2668 if (sk->sk_err)
2669 mask |= EPOLLERR;
2670 if (sk->sk_shutdown == SHUTDOWN_MASK)
2671 mask |= EPOLLHUP;
2672 if (sk->sk_shutdown & RCV_SHUTDOWN)
2673 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2674
2675 /* readable? */
David Brazdil0f672f62019-12-10 10:32:29 +00002676 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002677 mask |= EPOLLIN | EPOLLRDNORM;
2678
2679 /* Connection-based need to check for termination and startup */
2680 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2681 sk->sk_state == TCP_CLOSE)
2682 mask |= EPOLLHUP;
2683
2684 /*
2685 * we set writable also when the other side has shut down the
2686 * connection. This prevents stuck sockets.
2687 */
2688 if (unix_writable(sk))
2689 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2690
2691 return mask;
2692}
2693
2694static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2695 poll_table *wait)
2696{
2697 struct sock *sk = sock->sk, *other;
2698 unsigned int writable;
2699 __poll_t mask;
2700
2701 sock_poll_wait(file, sock, wait);
2702 mask = 0;
2703
2704 /* exceptional events? */
David Brazdil0f672f62019-12-10 10:32:29 +00002705 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002706 mask |= EPOLLERR |
2707 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2708
2709 if (sk->sk_shutdown & RCV_SHUTDOWN)
2710 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2711 if (sk->sk_shutdown == SHUTDOWN_MASK)
2712 mask |= EPOLLHUP;
2713
2714 /* readable? */
David Brazdil0f672f62019-12-10 10:32:29 +00002715 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002716 mask |= EPOLLIN | EPOLLRDNORM;
2717
2718 /* Connection-based need to check for termination and startup */
2719 if (sk->sk_type == SOCK_SEQPACKET) {
2720 if (sk->sk_state == TCP_CLOSE)
2721 mask |= EPOLLHUP;
2722 /* connection hasn't started yet? */
2723 if (sk->sk_state == TCP_SYN_SENT)
2724 return mask;
2725 }
2726
2727 /* No write status requested, avoid expensive OUT tests. */
2728 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2729 return mask;
2730
2731 writable = unix_writable(sk);
2732 if (writable) {
2733 unix_state_lock(sk);
2734
2735 other = unix_peer(sk);
2736 if (other && unix_peer(other) != sk &&
Olivier Deprez0e641232021-09-23 10:07:05 +02002737 unix_recvq_full_lockless(other) &&
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002738 unix_dgram_peer_wake_me(sk, other))
2739 writable = 0;
2740
2741 unix_state_unlock(sk);
2742 }
2743
2744 if (writable)
2745 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2746 else
2747 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2748
2749 return mask;
2750}
2751
2752#ifdef CONFIG_PROC_FS
2753
2754#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2755
2756#define get_bucket(x) ((x) >> BUCKET_SPACE)
2757#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2758#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2759
2760static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2761{
2762 unsigned long offset = get_offset(*pos);
2763 unsigned long bucket = get_bucket(*pos);
2764 struct sock *sk;
2765 unsigned long count = 0;
2766
2767 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2768 if (sock_net(sk) != seq_file_net(seq))
2769 continue;
2770 if (++count == offset)
2771 break;
2772 }
2773
2774 return sk;
2775}
2776
2777static struct sock *unix_next_socket(struct seq_file *seq,
2778 struct sock *sk,
2779 loff_t *pos)
2780{
2781 unsigned long bucket;
2782
2783 while (sk > (struct sock *)SEQ_START_TOKEN) {
2784 sk = sk_next(sk);
2785 if (!sk)
2786 goto next_bucket;
2787 if (sock_net(sk) == seq_file_net(seq))
2788 return sk;
2789 }
2790
2791 do {
2792 sk = unix_from_bucket(seq, pos);
2793 if (sk)
2794 return sk;
2795
2796next_bucket:
2797 bucket = get_bucket(*pos) + 1;
2798 *pos = set_bucket_offset(bucket, 1);
2799 } while (bucket < ARRAY_SIZE(unix_socket_table));
2800
2801 return NULL;
2802}
2803
2804static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2805 __acquires(unix_table_lock)
2806{
2807 spin_lock(&unix_table_lock);
2808
2809 if (!*pos)
2810 return SEQ_START_TOKEN;
2811
2812 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2813 return NULL;
2814
2815 return unix_next_socket(seq, NULL, pos);
2816}
2817
2818static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2819{
2820 ++*pos;
2821 return unix_next_socket(seq, v, pos);
2822}
2823
2824static void unix_seq_stop(struct seq_file *seq, void *v)
2825 __releases(unix_table_lock)
2826{
2827 spin_unlock(&unix_table_lock);
2828}
2829
2830static int unix_seq_show(struct seq_file *seq, void *v)
2831{
2832
2833 if (v == SEQ_START_TOKEN)
2834 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2835 "Inode Path\n");
2836 else {
2837 struct sock *s = v;
2838 struct unix_sock *u = unix_sk(s);
2839 unix_state_lock(s);
2840
2841 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2842 s,
2843 refcount_read(&s->sk_refcnt),
2844 0,
2845 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2846 s->sk_type,
2847 s->sk_socket ?
2848 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2849 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2850 sock_i_ino(s));
2851
David Brazdil0f672f62019-12-10 10:32:29 +00002852 if (u->addr) { // under unix_table_lock here
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002853 int i, len;
2854 seq_putc(seq, ' ');
2855
2856 i = 0;
2857 len = u->addr->len - sizeof(short);
2858 if (!UNIX_ABSTRACT(s))
2859 len--;
2860 else {
2861 seq_putc(seq, '@');
2862 i++;
2863 }
2864 for ( ; i < len; i++)
2865 seq_putc(seq, u->addr->name->sun_path[i] ?:
2866 '@');
2867 }
2868 unix_state_unlock(s);
2869 seq_putc(seq, '\n');
2870 }
2871
2872 return 0;
2873}
2874
2875static const struct seq_operations unix_seq_ops = {
2876 .start = unix_seq_start,
2877 .next = unix_seq_next,
2878 .stop = unix_seq_stop,
2879 .show = unix_seq_show,
2880};
2881#endif
2882
2883static const struct net_proto_family unix_family_ops = {
2884 .family = PF_UNIX,
2885 .create = unix_create,
2886 .owner = THIS_MODULE,
2887};
2888
2889
2890static int __net_init unix_net_init(struct net *net)
2891{
2892 int error = -ENOMEM;
2893
2894 net->unx.sysctl_max_dgram_qlen = 10;
2895 if (unix_sysctl_register(net))
2896 goto out;
2897
2898#ifdef CONFIG_PROC_FS
2899 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2900 sizeof(struct seq_net_private))) {
2901 unix_sysctl_unregister(net);
2902 goto out;
2903 }
2904#endif
2905 error = 0;
2906out:
2907 return error;
2908}
2909
2910static void __net_exit unix_net_exit(struct net *net)
2911{
2912 unix_sysctl_unregister(net);
2913 remove_proc_entry("unix", net->proc_net);
2914}
2915
2916static struct pernet_operations unix_net_ops = {
2917 .init = unix_net_init,
2918 .exit = unix_net_exit,
2919};
2920
2921static int __init af_unix_init(void)
2922{
2923 int rc = -1;
2924
2925 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2926
2927 rc = proto_register(&unix_proto, 1);
2928 if (rc != 0) {
2929 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2930 goto out;
2931 }
2932
2933 sock_register(&unix_family_ops);
2934 register_pernet_subsys(&unix_net_ops);
2935out:
2936 return rc;
2937}
2938
2939static void __exit af_unix_exit(void)
2940{
2941 sock_unregister(PF_UNIX);
2942 proto_unregister(&unix_proto);
2943 unregister_pernet_subsys(&unix_net_ops);
2944}
2945
2946/* Earlier than device_initcall() so that other drivers invoking
2947 request_module() don't end up in a loop when modprobe tries
2948 to use a UNIX socket. But later than subsys_initcall() because
2949 we depend on stuff initialised there */
2950fs_initcall(af_unix_init);
2951module_exit(af_unix_exit);
2952
2953MODULE_LICENSE("GPL");
2954MODULE_ALIAS_NETPROTO(PF_UNIX);