Blame - net/unix/af_unix.c - hafnium/third_party/linux

blob: 3098710c9c3443aa87b97fd0804d963f73f2b01c [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
				3	* NET4: Implementation of BSD Unix domain sockets.
				4	*
				5	* Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
				6	*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	7	* Fixes:
				8	* Linus Torvalds : Assorted bug cures.
				9	* Niibe Yutaka : async I/O support.
				10	* Carsten Paeth : PF_UNIX check, address fixes.
				11	* Alan Cox : Limit size of allocated blocks.
				12	* Alan Cox : Fixed the stupid socketpair bug.
				13	* Alan Cox : BSD compatibility fine tuning.
				14	* Alan Cox : Fixed a bug in connect when interrupted.
				15	* Alan Cox : Sorted out a proper draft version of
				16	* file descriptor passing hacked up from
				17	* Mike Shaver's work.
				18	* Marty Leisner : Fixes to fd passing
				19	* Nick Nevin : recvmsg bugfix.
				20	* Alan Cox : Started proper garbage collector
				21	* Heiko EiBfeldt : Missing verify_area check
				22	* Alan Cox : Started POSIXisms
				23	* Andreas Schwab : Replace inode by dentry for proper
				24	* reference counting
				25	* Kirk Petersen : Made this a module
				26	* Christoph Rohland : Elegant non-blocking accept/connect algorithm.
				27	* Lots of bug fixes.
				28	* Alexey Kuznetosv : Repaired (I hope) bugs introduces
				29	* by above two patches.
				30	* Andrea Arcangeli : If possible we block in connect(2)
				31	* if the max backlog of the listen socket
				32	* is been reached. This won't break
				33	* old apps and it will avoid huge amount
				34	* of socks hashed (this for unix_gc()
				35	* performances reasons).
				36	* Security fix that limits the max
				37	* number of socks to 2*max_files and
				38	* the number of skb queueable in the
				39	* dgram receiver.
				40	* Artur Skawina : Hash function optimizations
				41	* Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
				42	* Malcolm Beattie : Set peercred for socketpair
				43	* Michal Ostrowski : Module initialization cleanup.
				44	* Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
				45	* the core infrastructure is doing that
				46	* for all net proto families now (2.5.69+)
				47	*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	48	* Known differences from reference BSD that was tested:
				49	*
				50	* [TO FIX]
				51	* ECONNREFUSED is not returned from one end of a connected() socket to the
				52	* other the moment one end closes.
				53	* fstat() doesn't return st_dev=0, and give the blksize as high water mark
				54	* and a fake inode identifier (nor the BSD first socket fstat twice bug).
				55	* [NOT TO FIX]
				56	* accept() returns a path name even if the connecting socket has closed
				57	* in the meantime (BSD loses the path and gives up).
				58	* accept() returns 0 length path for an unbound connector. BSD returns 16
				59	* and a null first byte in the path (but not for gethost/peername - BSD bug ??)
				60	* socketpair(...SOCK_RAW..) doesn't panic the kernel.
				61	* BSD af_unix apparently has connect forgetting to block properly.
				62	* (need to check this with the POSIX spec in detail)
				63	*
				64	* Differences from 2.0.0-11-... (ANK)
				65	* Bug fixes and improvements.
				66	* - client shutdown killed server socket.
				67	* - removed all useless cli/sti pairs.
				68	*
				69	* Semantic changes/extensions.
				70	* - generic control message passing.
				71	* - SCM_CREDENTIALS control message.
				72	* - "Abstract" (not FS based) socket bindings.
				73	* Abstract names are sequences of bytes (not zero terminated)
				74	* started by 0, so that this name space does not intersect
				75	* with BSD names.
				76	*/
				77
				78	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				79
				80	#include <linux/module.h>
				81	#include <linux/kernel.h>
				82	#include <linux/signal.h>
				83	#include <linux/sched/signal.h>
				84	#include <linux/errno.h>
				85	#include <linux/string.h>
				86	#include <linux/stat.h>
				87	#include <linux/dcache.h>
				88	#include <linux/namei.h>
				89	#include <linux/socket.h>
				90	#include <linux/un.h>
				91	#include <linux/fcntl.h>
				92	#include <linux/termios.h>
				93	#include <linux/sockios.h>
				94	#include <linux/net.h>
				95	#include <linux/in.h>
				96	#include <linux/fs.h>
				97	#include <linux/slab.h>
				98	#include <linux/uaccess.h>
				99	#include <linux/skbuff.h>
				100	#include <linux/netdevice.h>
				101	#include <net/net_namespace.h>
				102	#include <net/sock.h>
				103	#include <net/tcp_states.h>
				104	#include <net/af_unix.h>
				105	#include <linux/proc_fs.h>
				106	#include <linux/seq_file.h>
				107	#include <net/scm.h>
				108	#include <linux/init.h>
				109	#include <linux/poll.h>
				110	#include <linux/rtnetlink.h>
				111	#include <linux/mount.h>
				112	#include <net/checksum.h>
				113	#include <linux/security.h>
				114	#include <linux/freezer.h>
				115	#include <linux/file.h>
				116
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	117	#include "scm.h"
				118
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	119	struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
				120	EXPORT_SYMBOL_GPL(unix_socket_table);
				121	DEFINE_SPINLOCK(unix_table_lock);
				122	EXPORT_SYMBOL_GPL(unix_table_lock);
				123	static atomic_long_t unix_nr_socks;
				124
				125
				126	static struct hlist_head unix_sockets_unbound(void addr)
				127	{
				128	unsigned long hash = (unsigned long)addr;
				129
				130	hash ^= hash >> 16;
				131	hash ^= hash >> 8;
				132	hash %= UNIX_HASH_SIZE;
				133	return &unix_socket_table[UNIX_HASH_SIZE + hash];
				134	}
				135
				136	#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
				137
				138	#ifdef CONFIG_SECURITY_NETWORK
				139	static void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				140	{
				141	UNIXCB(skb).secid = scm->secid;
				142	}
				143
				144	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				145	{
				146	scm->secid = UNIXCB(skb).secid;
				147	}
				148
				149	static inline bool unix_secdata_eq(struct scm_cookie scm, struct sk_buff skb)
				150	{
				151	return (scm->secid == UNIXCB(skb).secid);
				152	}
				153	#else
				154	static inline void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				155	{ }
				156
				157	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				158	{ }
				159
				160	static inline bool unix_secdata_eq(struct scm_cookie scm, struct sk_buff skb)
				161	{
				162	return true;
				163	}
				164	#endif /* CONFIG_SECURITY_NETWORK */
				165
				166	/*
				167	* SMP locking strategy:
				168	* hash table is protected with spinlock unix_table_lock
				169	* each socket state is protected by separate spin lock.
				170	*/
				171
				172	static inline unsigned int unix_hash_fold(__wsum n)
				173	{
				174	unsigned int hash = (__force unsigned int)csum_fold(n);
				175
				176	hash ^= hash>>8;
				177	return hash&(UNIX_HASH_SIZE-1);
				178	}
				179
				180	#define unix_peer(sk) (unix_sk(sk)->peer)
				181
				182	static inline int unix_our_peer(struct sock sk, struct sock osk)
				183	{
				184	return unix_peer(osk) == sk;
				185	}
				186
				187	static inline int unix_may_send(struct sock sk, struct sock osk)
				188	{
				189	return unix_peer(osk) == NULL \|\| unix_our_peer(sk, osk);
				190	}
				191
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	192	static inline int unix_recvq_full(const struct sock *sk)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	193	{
				194	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
				195	}
				196
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	197	static inline int unix_recvq_full_lockless(const struct sock *sk)
				198	{
				199	return skb_queue_len_lockless(&sk->sk_receive_queue) >
				200	READ_ONCE(sk->sk_max_ack_backlog);
				201	}
				202
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	203	struct sock unix_peer_get(struct sock s)
				204	{
				205	struct sock *peer;
				206
				207	unix_state_lock(s);
				208	peer = unix_peer(s);
				209	if (peer)
				210	sock_hold(peer);
				211	unix_state_unlock(s);
				212	return peer;
				213	}
				214	EXPORT_SYMBOL_GPL(unix_peer_get);
				215
				216	static inline void unix_release_addr(struct unix_address *addr)
				217	{
				218	if (refcount_dec_and_test(&addr->refcnt))
				219	kfree(addr);
				220	}
				221
				222	/*
				223	* Check unix socket name:
				224	* - should be not zero length.
				225	* - if started by not zero, should be NULL terminated (FS object)
				226	* - if started by zero, it is abstract name.
				227	*/
				228
				229	static int unix_mkname(struct sockaddr_un sunaddr, int len, unsigned int hashp)
				230	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	231	*hashp = 0;
				232
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	233	if (len <= sizeof(short) \|\| len > sizeof(*sunaddr))
				234	return -EINVAL;
				235	if (!sunaddr \|\| sunaddr->sun_family != AF_UNIX)
				236	return -EINVAL;
				237	if (sunaddr->sun_path[0]) {
				238	/*
				239	* This may look like an off by one error but it is a bit more
				240	* subtle. 108 is the longest valid AF_UNIX path for a binding.
				241	* sun_path[108] doesn't as such exist. However in kernel space
				242	* we are guaranteed that it is a valid memory location in our
				243	* kernel address buffer.
				244	*/
				245	((char *)sunaddr)[len] = 0;
				246	len = strlen(sunaddr->sun_path)+1+sizeof(short);
				247	return len;
				248	}
				249
				250	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
				251	return len;
				252	}
				253
				254	static void __unix_remove_socket(struct sock *sk)
				255	{
				256	sk_del_node_init(sk);
				257	}
				258
				259	static void __unix_insert_socket(struct hlist_head list, struct sock sk)
				260	{
				261	WARN_ON(!sk_unhashed(sk));
				262	sk_add_node(sk, list);
				263	}
				264
				265	static inline void unix_remove_socket(struct sock *sk)
				266	{
				267	spin_lock(&unix_table_lock);
				268	__unix_remove_socket(sk);
				269	spin_unlock(&unix_table_lock);
				270	}
				271
				272	static inline void unix_insert_socket(struct hlist_head list, struct sock sk)
				273	{
				274	spin_lock(&unix_table_lock);
				275	__unix_insert_socket(list, sk);
				276	spin_unlock(&unix_table_lock);
				277	}
				278
				279	static struct sock __unix_find_socket_byname(struct net net,
				280	struct sockaddr_un *sunname,
				281	int len, int type, unsigned int hash)
				282	{
				283	struct sock *s;
				284
				285	sk_for_each(s, &unix_socket_table[hash ^ type]) {
				286	struct unix_sock *u = unix_sk(s);
				287
				288	if (!net_eq(sock_net(s), net))
				289	continue;
				290
				291	if (u->addr->len == len &&
				292	!memcmp(u->addr->name, sunname, len))
				293	goto found;
				294	}
				295	s = NULL;
				296	found:
				297	return s;
				298	}
				299
				300	static inline struct sock unix_find_socket_byname(struct net net,
				301	struct sockaddr_un *sunname,
				302	int len, int type,
				303	unsigned int hash)
				304	{
				305	struct sock *s;
				306
				307	spin_lock(&unix_table_lock);
				308	s = __unix_find_socket_byname(net, sunname, len, type, hash);
				309	if (s)
				310	sock_hold(s);
				311	spin_unlock(&unix_table_lock);
				312	return s;
				313	}
				314
				315	static struct sock unix_find_socket_byinode(struct inode i)
				316	{
				317	struct sock *s;
				318
				319	spin_lock(&unix_table_lock);
				320	sk_for_each(s,
				321	&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
				322	struct dentry *dentry = unix_sk(s)->path.dentry;
				323
				324	if (dentry && d_backing_inode(dentry) == i) {
				325	sock_hold(s);
				326	goto found;
				327	}
				328	}
				329	s = NULL;
				330	found:
				331	spin_unlock(&unix_table_lock);
				332	return s;
				333	}
				334
				335	/* Support code for asymmetrically connected dgram sockets
				336	*
				337	* If a datagram socket is connected to a socket not itself connected
				338	* to the first socket (eg, /dev/log), clients may only enqueue more
				339	* messages if the present receive queue of the server socket is not
				340	* "too large". This means there's a second writeability condition
				341	* poll and sendmsg need to test. The dgram recv code will do a wake
				342	* up on the peer_wait wait queue of a socket upon reception of a
				343	* datagram which needs to be propagated to sleeping would-be writers
				344	* since these might not have sent anything so far. This can't be
				345	* accomplished via poll_wait because the lifetime of the server
				346	* socket might be less than that of its clients if these break their
				347	* association with it or if the server socket is closed while clients
				348	* are still connected to it and there's no way to inform "a polling
				349	* implementation" that it should let go of a certain wait queue
				350	*
				351	* In order to propagate a wake up, a wait_queue_entry_t of the client
				352	* socket is enqueued on the peer_wait queue of the server socket
				353	* whose wake function does a wake_up on the ordinary client socket
				354	* wait queue. This connection is established whenever a write (or
				355	* poll for write) hit the flow control condition and broken when the
				356	* association to the server socket is dissolved or after a wake up
				357	* was relayed.
				358	*/
				359
				360	static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
				361	void *key)
				362	{
				363	struct unix_sock *u;
				364	wait_queue_head_t *u_sleep;
				365
				366	u = container_of(q, struct unix_sock, peer_wake);
				367
				368	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
				369	q);
				370	u->peer_wake.private = NULL;
				371
				372	/* relaying can only happen while the wq still exists */
				373	u_sleep = sk_sleep(&u->sk);
				374	if (u_sleep)
				375	wake_up_interruptible_poll(u_sleep, key_to_poll(key));
				376
				377	return 0;
				378	}
				379
				380	static int unix_dgram_peer_wake_connect(struct sock sk, struct sock other)
				381	{
				382	struct unix_sock u, u_other;
				383	int rc;
				384
				385	u = unix_sk(sk);
				386	u_other = unix_sk(other);
				387	rc = 0;
				388	spin_lock(&u_other->peer_wait.lock);
				389
				390	if (!u->peer_wake.private) {
				391	u->peer_wake.private = other;
				392	__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
				393
				394	rc = 1;
				395	}
				396
				397	spin_unlock(&u_other->peer_wait.lock);
				398	return rc;
				399	}
				400
				401	static void unix_dgram_peer_wake_disconnect(struct sock *sk,
				402	struct sock *other)
				403	{
				404	struct unix_sock u, u_other;
				405
				406	u = unix_sk(sk);
				407	u_other = unix_sk(other);
				408	spin_lock(&u_other->peer_wait.lock);
				409
				410	if (u->peer_wake.private == other) {
				411	__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
				412	u->peer_wake.private = NULL;
				413	}
				414
				415	spin_unlock(&u_other->peer_wait.lock);
				416	}
				417
				418	static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
				419	struct sock *other)
				420	{
				421	unix_dgram_peer_wake_disconnect(sk, other);
				422	wake_up_interruptible_poll(sk_sleep(sk),
				423	EPOLLOUT \|
				424	EPOLLWRNORM \|
				425	EPOLLWRBAND);
				426	}
				427
				428	/* preconditions:
				429	* - unix_peer(sk) == other
				430	* - association is stable
				431	*/
				432	static int unix_dgram_peer_wake_me(struct sock sk, struct sock other)
				433	{
				434	int connected;
				435
				436	connected = unix_dgram_peer_wake_connect(sk, other);
				437
				438	/* If other is SOCK_DEAD, we want to make sure we signal
				439	* POLLOUT, such that a subsequent write() can get a
				440	* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
				441	* to other and its full, we will hang waiting for POLLOUT.
				442	*/
				443	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
				444	return 1;
				445
				446	if (connected)
				447	unix_dgram_peer_wake_disconnect(sk, other);
				448
				449	return 0;
				450	}
				451
				452	static int unix_writable(const struct sock *sk)
				453	{
				454	return sk->sk_state != TCP_LISTEN &&
				455	(refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
				456	}
				457
				458	static void unix_write_space(struct sock *sk)
				459	{
				460	struct socket_wq *wq;
				461
				462	rcu_read_lock();
				463	if (unix_writable(sk)) {
				464	wq = rcu_dereference(sk->sk_wq);
				465	if (skwq_has_sleeper(wq))
				466	wake_up_interruptible_sync_poll(&wq->wait,
				467	EPOLLOUT \| EPOLLWRNORM \| EPOLLWRBAND);
				468	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
				469	}
				470	rcu_read_unlock();
				471	}
				472
				473	/* When dgram socket disconnects (or changes its peer), we clear its receive
				474	* queue of packets arrived from previous peer. First, it allows to do
				475	* flow control based only on wmem_alloc; second, sk connected to peer
				476	* may receive messages only from that peer. */
				477	static void unix_dgram_disconnected(struct sock sk, struct sock other)
				478	{
				479	if (!skb_queue_empty(&sk->sk_receive_queue)) {
				480	skb_queue_purge(&sk->sk_receive_queue);
				481	wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
				482
				483	/* If one link of bidirectional dgram pipe is disconnected,
				484	* we signal error. Messages are lost. Do not make this,
				485	* when peer was not connected to us.
				486	*/
				487	if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
				488	other->sk_err = ECONNRESET;
				489	other->sk_error_report(other);
				490	}
				491	}
				492	}
				493
				494	static void unix_sock_destructor(struct sock *sk)
				495	{
				496	struct unix_sock *u = unix_sk(sk);
				497
				498	skb_queue_purge(&sk->sk_receive_queue);
				499
				500	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
				501	WARN_ON(!sk_unhashed(sk));
				502	WARN_ON(sk->sk_socket);
				503	if (!sock_flag(sk, SOCK_DEAD)) {
				504	pr_info("Attempt to release alive unix socket: %p\n", sk);
				505	return;
				506	}
				507
				508	if (u->addr)
				509	unix_release_addr(u->addr);
				510
				511	atomic_long_dec(&unix_nr_socks);
				512	local_bh_disable();
				513	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
				514	local_bh_enable();
				515	#ifdef UNIX_REFCNT_DEBUG
				516	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
				517	atomic_long_read(&unix_nr_socks));
				518	#endif
				519	}
				520
				521	static void unix_release_sock(struct sock *sk, int embrion)
				522	{
				523	struct unix_sock *u = unix_sk(sk);
				524	struct path path;
				525	struct sock *skpair;
				526	struct sk_buff *skb;
				527	int state;
				528
				529	unix_remove_socket(sk);
				530
				531	/* Clear state */
				532	unix_state_lock(sk);
				533	sock_orphan(sk);
				534	sk->sk_shutdown = SHUTDOWN_MASK;
				535	path = u->path;
				536	u->path.dentry = NULL;
				537	u->path.mnt = NULL;
				538	state = sk->sk_state;
				539	sk->sk_state = TCP_CLOSE;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	540
				541	skpair = unix_peer(sk);
				542	unix_peer(sk) = NULL;
				543
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	544	unix_state_unlock(sk);
				545
				546	wake_up_interruptible_all(&u->peer_wait);
				547
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	548	if (skpair != NULL) {
				549	if (sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) {
				550	unix_state_lock(skpair);
				551	/* No more writes */
				552	skpair->sk_shutdown = SHUTDOWN_MASK;
				553	if (!skb_queue_empty(&sk->sk_receive_queue) \|\| embrion)
				554	skpair->sk_err = ECONNRESET;
				555	unix_state_unlock(skpair);
				556	skpair->sk_state_change(skpair);
				557	sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
				558	}
				559
				560	unix_dgram_peer_wake_disconnect(sk, skpair);
				561	sock_put(skpair); /* It may now die */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	562	}
				563
				564	/* Try to flush out this socket. Throw out buffers at least */
				565
				566	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
				567	if (state == TCP_LISTEN)
				568	unix_release_sock(skb->sk, 1);
				569	/* passed fds are erased in the kfree_skb hook */
				570	UNIXCB(skb).consumed = skb->len;
				571	kfree_skb(skb);
				572	}
				573
				574	if (path.dentry)
				575	path_put(&path);
				576
				577	sock_put(sk);
				578
				579	/* ---- Socket is dead now and most probably destroyed ---- */
				580
				581	/*
				582	* Fixme: BSD difference: In BSD all sockets connected to us get
				583	* ECONNRESET and we die on the spot. In Linux we behave
				584	* like files and pipes do and wait for the last
				585	* dereference.
				586	*
				587	* Can't we simply set sock->err?
				588	*
				589	* What the above comment does talk about? --ANK(980817)
				590	*/
				591
				592	if (unix_tot_inflight)
				593	unix_gc(); /* Garbage collect fds */
				594	}
				595
				596	static void init_peercred(struct sock *sk)
				597	{
				598	put_pid(sk->sk_peer_pid);
				599	if (sk->sk_peer_cred)
				600	put_cred(sk->sk_peer_cred);
				601	sk->sk_peer_pid = get_pid(task_tgid(current));
				602	sk->sk_peer_cred = get_current_cred();
				603	}
				604
				605	static void copy_peercred(struct sock sk, struct sock peersk)
				606	{
				607	put_pid(sk->sk_peer_pid);
				608	if (sk->sk_peer_cred)
				609	put_cred(sk->sk_peer_cred);
				610	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
				611	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
				612	}
				613
				614	static int unix_listen(struct socket *sock, int backlog)
				615	{
				616	int err;
				617	struct sock *sk = sock->sk;
				618	struct unix_sock *u = unix_sk(sk);
				619	struct pid *old_pid = NULL;
				620
				621	err = -EOPNOTSUPP;
				622	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				623	goto out; /* Only stream/seqpacket sockets accept */
				624	err = -EINVAL;
				625	if (!u->addr)
				626	goto out; /* No listens on an unbound socket */
				627	unix_state_lock(sk);
				628	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
				629	goto out_unlock;
				630	if (backlog > sk->sk_max_ack_backlog)
				631	wake_up_interruptible_all(&u->peer_wait);
				632	sk->sk_max_ack_backlog = backlog;
				633	sk->sk_state = TCP_LISTEN;
				634	/* set credentials so connect can copy them */
				635	init_peercred(sk);
				636	err = 0;
				637
				638	out_unlock:
				639	unix_state_unlock(sk);
				640	put_pid(old_pid);
				641	out:
				642	return err;
				643	}
				644
				645	static int unix_release(struct socket *);
				646	static int unix_bind(struct socket , struct sockaddr , int);
				647	static int unix_stream_connect(struct socket , struct sockaddr ,
				648	int addr_len, int flags);
				649	static int unix_socketpair(struct socket , struct socket );
				650	static int unix_accept(struct socket , struct socket , int, bool);
				651	static int unix_getname(struct socket , struct sockaddr , int);
				652	static __poll_t unix_poll(struct file , struct socket , poll_table *);
				653	static __poll_t unix_dgram_poll(struct file , struct socket ,
				654	poll_table *);
				655	static int unix_ioctl(struct socket *, unsigned int, unsigned long);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	656	#ifdef CONFIG_COMPAT
				657	static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
				658	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	659	static int unix_shutdown(struct socket *, int);
				660	static int unix_stream_sendmsg(struct socket , struct msghdr , size_t);
				661	static int unix_stream_recvmsg(struct socket , struct msghdr , size_t, int);
				662	static ssize_t unix_stream_sendpage(struct socket , struct page , int offset,
				663	size_t size, int flags);
				664	static ssize_t unix_stream_splice_read(struct socket , loff_t ppos,
				665	struct pipe_inode_info *, size_t size,
				666	unsigned int flags);
				667	static int unix_dgram_sendmsg(struct socket , struct msghdr , size_t);
				668	static int unix_dgram_recvmsg(struct socket , struct msghdr , size_t, int);
				669	static int unix_dgram_connect(struct socket , struct sockaddr ,
				670	int, int);
				671	static int unix_seqpacket_sendmsg(struct socket , struct msghdr , size_t);
				672	static int unix_seqpacket_recvmsg(struct socket , struct msghdr , size_t,
				673	int);
				674
				675	static int unix_set_peek_off(struct sock *sk, int val)
				676	{
				677	struct unix_sock *u = unix_sk(sk);
				678
				679	if (mutex_lock_interruptible(&u->iolock))
				680	return -EINTR;
				681
				682	sk->sk_peek_off = val;
				683	mutex_unlock(&u->iolock);
				684
				685	return 0;
				686	}
				687
				688
				689	static const struct proto_ops unix_stream_ops = {
				690	.family = PF_UNIX,
				691	.owner = THIS_MODULE,
				692	.release = unix_release,
				693	.bind = unix_bind,
				694	.connect = unix_stream_connect,
				695	.socketpair = unix_socketpair,
				696	.accept = unix_accept,
				697	.getname = unix_getname,
				698	.poll = unix_poll,
				699	.ioctl = unix_ioctl,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	700	#ifdef CONFIG_COMPAT
				701	.compat_ioctl = unix_compat_ioctl,
				702	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	703	.listen = unix_listen,
				704	.shutdown = unix_shutdown,
				705	.setsockopt = sock_no_setsockopt,
				706	.getsockopt = sock_no_getsockopt,
				707	.sendmsg = unix_stream_sendmsg,
				708	.recvmsg = unix_stream_recvmsg,
				709	.mmap = sock_no_mmap,
				710	.sendpage = unix_stream_sendpage,
				711	.splice_read = unix_stream_splice_read,
				712	.set_peek_off = unix_set_peek_off,
				713	};
				714
				715	static const struct proto_ops unix_dgram_ops = {
				716	.family = PF_UNIX,
				717	.owner = THIS_MODULE,
				718	.release = unix_release,
				719	.bind = unix_bind,
				720	.connect = unix_dgram_connect,
				721	.socketpair = unix_socketpair,
				722	.accept = sock_no_accept,
				723	.getname = unix_getname,
				724	.poll = unix_dgram_poll,
				725	.ioctl = unix_ioctl,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	726	#ifdef CONFIG_COMPAT
				727	.compat_ioctl = unix_compat_ioctl,
				728	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	729	.listen = sock_no_listen,
				730	.shutdown = unix_shutdown,
				731	.setsockopt = sock_no_setsockopt,
				732	.getsockopt = sock_no_getsockopt,
				733	.sendmsg = unix_dgram_sendmsg,
				734	.recvmsg = unix_dgram_recvmsg,
				735	.mmap = sock_no_mmap,
				736	.sendpage = sock_no_sendpage,
				737	.set_peek_off = unix_set_peek_off,
				738	};
				739
				740	static const struct proto_ops unix_seqpacket_ops = {
				741	.family = PF_UNIX,
				742	.owner = THIS_MODULE,
				743	.release = unix_release,
				744	.bind = unix_bind,
				745	.connect = unix_stream_connect,
				746	.socketpair = unix_socketpair,
				747	.accept = unix_accept,
				748	.getname = unix_getname,
				749	.poll = unix_dgram_poll,
				750	.ioctl = unix_ioctl,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	751	#ifdef CONFIG_COMPAT
				752	.compat_ioctl = unix_compat_ioctl,
				753	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	754	.listen = unix_listen,
				755	.shutdown = unix_shutdown,
				756	.setsockopt = sock_no_setsockopt,
				757	.getsockopt = sock_no_getsockopt,
				758	.sendmsg = unix_seqpacket_sendmsg,
				759	.recvmsg = unix_seqpacket_recvmsg,
				760	.mmap = sock_no_mmap,
				761	.sendpage = sock_no_sendpage,
				762	.set_peek_off = unix_set_peek_off,
				763	};
				764
				765	static struct proto unix_proto = {
				766	.name = "UNIX",
				767	.owner = THIS_MODULE,
				768	.obj_size = sizeof(struct unix_sock),
				769	};
				770
				771	static struct sock unix_create1(struct net net, struct socket *sock, int kern)
				772	{
				773	struct sock *sk = NULL;
				774	struct unix_sock *u;
				775
				776	atomic_long_inc(&unix_nr_socks);
				777	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
				778	goto out;
				779
				780	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
				781	if (!sk)
				782	goto out;
				783
				784	sock_init_data(sock, sk);
				785
				786	sk->sk_allocation = GFP_KERNEL_ACCOUNT;
				787	sk->sk_write_space = unix_write_space;
				788	sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
				789	sk->sk_destruct = unix_sock_destructor;
				790	u = unix_sk(sk);
				791	u->path.dentry = NULL;
				792	u->path.mnt = NULL;
				793	spin_lock_init(&u->lock);
				794	atomic_long_set(&u->inflight, 0);
				795	INIT_LIST_HEAD(&u->link);
				796	mutex_init(&u->iolock); /* single task reading lock */
				797	mutex_init(&u->bindlock); /* single task binding lock */
				798	init_waitqueue_head(&u->peer_wait);
				799	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
				800	unix_insert_socket(unix_sockets_unbound(sk), sk);
				801	out:
				802	if (sk == NULL)
				803	atomic_long_dec(&unix_nr_socks);
				804	else {
				805	local_bh_disable();
				806	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
				807	local_bh_enable();
				808	}
				809	return sk;
				810	}
				811
				812	static int unix_create(struct net net, struct socket sock, int protocol,
				813	int kern)
				814	{
				815	if (protocol && protocol != PF_UNIX)
				816	return -EPROTONOSUPPORT;
				817
				818	sock->state = SS_UNCONNECTED;
				819
				820	switch (sock->type) {
				821	case SOCK_STREAM:
				822	sock->ops = &unix_stream_ops;
				823	break;
				824	/*
				825	* Believe it or not BSD has AF_UNIX, SOCK_RAW though
				826	* nothing uses it.
				827	*/
				828	case SOCK_RAW:
				829	sock->type = SOCK_DGRAM;
				830	/* fall through */
				831	case SOCK_DGRAM:
				832	sock->ops = &unix_dgram_ops;
				833	break;
				834	case SOCK_SEQPACKET:
				835	sock->ops = &unix_seqpacket_ops;
				836	break;
				837	default:
				838	return -ESOCKTNOSUPPORT;
				839	}
				840
				841	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
				842	}
				843
				844	static int unix_release(struct socket *sock)
				845	{
				846	struct sock *sk = sock->sk;
				847
				848	if (!sk)
				849	return 0;
				850
				851	unix_release_sock(sk, 0);
				852	sock->sk = NULL;
				853
				854	return 0;
				855	}
				856
				857	static int unix_autobind(struct socket *sock)
				858	{
				859	struct sock *sk = sock->sk;
				860	struct net *net = sock_net(sk);
				861	struct unix_sock *u = unix_sk(sk);
				862	static u32 ordernum = 1;
				863	struct unix_address *addr;
				864	int err;
				865	unsigned int retries = 0;
				866
				867	err = mutex_lock_interruptible(&u->bindlock);
				868	if (err)
				869	return err;
				870
				871	err = 0;
				872	if (u->addr)
				873	goto out;
				874
				875	err = -ENOMEM;
				876	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
				877	if (!addr)
				878	goto out;
				879
				880	addr->name->sun_family = AF_UNIX;
				881	refcount_set(&addr->refcnt, 1);
				882
				883	retry:
				884	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
				885	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
				886
				887	spin_lock(&unix_table_lock);
				888	ordernum = (ordernum+1)&0xFFFFF;
				889
				890	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
				891	addr->hash)) {
				892	spin_unlock(&unix_table_lock);
				893	/*
				894	* __unix_find_socket_byname() may take long time if many names
				895	* are already in use.
				896	*/
				897	cond_resched();
				898	/* Give up if all names seems to be in use. */
				899	if (retries++ == 0xFFFFF) {
				900	err = -ENOSPC;
				901	kfree(addr);
				902	goto out;
				903	}
				904	goto retry;
				905	}
				906	addr->hash ^= sk->sk_type;
				907
				908	__unix_remove_socket(sk);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	909	smp_store_release(&u->addr, addr);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	910	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
				911	spin_unlock(&unix_table_lock);
				912	err = 0;
				913
				914	out: mutex_unlock(&u->bindlock);
				915	return err;
				916	}
				917
				918	static struct sock unix_find_other(struct net net,
				919	struct sockaddr_un *sunname, int len,
				920	int type, unsigned int hash, int *error)
				921	{
				922	struct sock *u;
				923	struct path path;
				924	int err = 0;
				925
				926	if (sunname->sun_path[0]) {
				927	struct inode *inode;
				928	err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
				929	if (err)
				930	goto fail;
				931	inode = d_backing_inode(path.dentry);
				932	err = inode_permission(inode, MAY_WRITE);
				933	if (err)
				934	goto put_fail;
				935
				936	err = -ECONNREFUSED;
				937	if (!S_ISSOCK(inode->i_mode))
				938	goto put_fail;
				939	u = unix_find_socket_byinode(inode);
				940	if (!u)
				941	goto put_fail;
				942
				943	if (u->sk_type == type)
				944	touch_atime(&path);
				945
				946	path_put(&path);
				947
				948	err = -EPROTOTYPE;
				949	if (u->sk_type != type) {
				950	sock_put(u);
				951	goto fail;
				952	}
				953	} else {
				954	err = -ECONNREFUSED;
				955	u = unix_find_socket_byname(net, sunname, len, type, hash);
				956	if (u) {
				957	struct dentry *dentry;
				958	dentry = unix_sk(u)->path.dentry;
				959	if (dentry)
				960	touch_atime(&unix_sk(u)->path);
				961	} else
				962	goto fail;
				963	}
				964	return u;
				965
				966	put_fail:
				967	path_put(&path);
				968	fail:
				969	*error = err;
				970	return NULL;
				971	}
				972
				973	static int unix_mknod(const char sun_path, umode_t mode, struct path res)
				974	{
				975	struct dentry *dentry;
				976	struct path path;
				977	int err = 0;
				978	/*
				979	* Get the parent directory, calculate the hash for last
				980	* component.
				981	*/
				982	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
				983	err = PTR_ERR(dentry);
				984	if (IS_ERR(dentry))
				985	return err;
				986
				987	/*
				988	* All right, let's create it.
				989	*/
				990	err = security_path_mknod(&path, dentry, mode, 0);
				991	if (!err) {
				992	err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
				993	if (!err) {
				994	res->mnt = mntget(path.mnt);
				995	res->dentry = dget(dentry);
				996	}
				997	}
				998	done_path_create(&path, dentry);
				999	return err;
				1000	}
				1001
				1002	static int unix_bind(struct socket sock, struct sockaddr uaddr, int addr_len)
				1003	{
				1004	struct sock *sk = sock->sk;
				1005	struct net *net = sock_net(sk);
				1006	struct unix_sock *u = unix_sk(sk);
				1007	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				1008	char *sun_path = sunaddr->sun_path;
				1009	int err;
				1010	unsigned int hash;
				1011	struct unix_address *addr;
				1012	struct hlist_head *list;
				1013	struct path path = { };
				1014
				1015	err = -EINVAL;
				1016	if (addr_len < offsetofend(struct sockaddr_un, sun_family) \|\|
				1017	sunaddr->sun_family != AF_UNIX)
				1018	goto out;
				1019
				1020	if (addr_len == sizeof(short)) {
				1021	err = unix_autobind(sock);
				1022	goto out;
				1023	}
				1024
				1025	err = unix_mkname(sunaddr, addr_len, &hash);
				1026	if (err < 0)
				1027	goto out;
				1028	addr_len = err;
				1029
				1030	if (sun_path[0]) {
				1031	umode_t mode = S_IFSOCK \|
				1032	(SOCK_INODE(sock)->i_mode & ~current_umask());
				1033	err = unix_mknod(sun_path, mode, &path);
				1034	if (err) {
				1035	if (err == -EEXIST)
				1036	err = -EADDRINUSE;
				1037	goto out;
				1038	}
				1039	}
				1040
				1041	err = mutex_lock_interruptible(&u->bindlock);
				1042	if (err)
				1043	goto out_put;
				1044
				1045	err = -EINVAL;
				1046	if (u->addr)
				1047	goto out_up;
				1048
				1049	err = -ENOMEM;
				1050	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
				1051	if (!addr)
				1052	goto out_up;
				1053
				1054	memcpy(addr->name, sunaddr, addr_len);
				1055	addr->len = addr_len;
				1056	addr->hash = hash ^ sk->sk_type;
				1057	refcount_set(&addr->refcnt, 1);
				1058
				1059	if (sun_path[0]) {
				1060	addr->hash = UNIX_HASH_SIZE;
				1061	hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
				1062	spin_lock(&unix_table_lock);
				1063	u->path = path;
				1064	list = &unix_socket_table[hash];
				1065	} else {
				1066	spin_lock(&unix_table_lock);
				1067	err = -EADDRINUSE;
				1068	if (__unix_find_socket_byname(net, sunaddr, addr_len,
				1069	sk->sk_type, hash)) {
				1070	unix_release_addr(addr);
				1071	goto out_unlock;
				1072	}
				1073
				1074	list = &unix_socket_table[addr->hash];
				1075	}
				1076
				1077	err = 0;
				1078	__unix_remove_socket(sk);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1079	smp_store_release(&u->addr, addr);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1080	__unix_insert_socket(list, sk);
				1081
				1082	out_unlock:
				1083	spin_unlock(&unix_table_lock);
				1084	out_up:
				1085	mutex_unlock(&u->bindlock);
				1086	out_put:
				1087	if (err)
				1088	path_put(&path);
				1089	out:
				1090	return err;
				1091	}
				1092
				1093	static void unix_state_double_lock(struct sock sk1, struct sock sk2)
				1094	{
				1095	if (unlikely(sk1 == sk2) \|\| !sk2) {
				1096	unix_state_lock(sk1);
				1097	return;
				1098	}
				1099	if (sk1 < sk2) {
				1100	unix_state_lock(sk1);
				1101	unix_state_lock_nested(sk2);
				1102	} else {
				1103	unix_state_lock(sk2);
				1104	unix_state_lock_nested(sk1);
				1105	}
				1106	}
				1107
				1108	static void unix_state_double_unlock(struct sock sk1, struct sock sk2)
				1109	{
				1110	if (unlikely(sk1 == sk2) \|\| !sk2) {
				1111	unix_state_unlock(sk1);
				1112	return;
				1113	}
				1114	unix_state_unlock(sk1);
				1115	unix_state_unlock(sk2);
				1116	}
				1117
				1118	static int unix_dgram_connect(struct socket sock, struct sockaddr addr,
				1119	int alen, int flags)
				1120	{
				1121	struct sock *sk = sock->sk;
				1122	struct net *net = sock_net(sk);
				1123	struct sockaddr_un sunaddr = (struct sockaddr_un )addr;
				1124	struct sock *other;
				1125	unsigned int hash;
				1126	int err;
				1127
				1128	err = -EINVAL;
				1129	if (alen < offsetofend(struct sockaddr, sa_family))
				1130	goto out;
				1131
				1132	if (addr->sa_family != AF_UNSPEC) {
				1133	err = unix_mkname(sunaddr, alen, &hash);
				1134	if (err < 0)
				1135	goto out;
				1136	alen = err;
				1137
				1138	if (test_bit(SOCK_PASSCRED, &sock->flags) &&
				1139	!unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
				1140	goto out;
				1141
				1142	restart:
				1143	other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
				1144	if (!other)
				1145	goto out;
				1146
				1147	unix_state_double_lock(sk, other);
				1148
				1149	/* Apparently VFS overslept socket death. Retry. */
				1150	if (sock_flag(other, SOCK_DEAD)) {
				1151	unix_state_double_unlock(sk, other);
				1152	sock_put(other);
				1153	goto restart;
				1154	}
				1155
				1156	err = -EPERM;
				1157	if (!unix_may_send(sk, other))
				1158	goto out_unlock;
				1159
				1160	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1161	if (err)
				1162	goto out_unlock;
				1163
				1164	} else {
				1165	/*
				1166	* 1003.1g breaking connected state with AF_UNSPEC
				1167	*/
				1168	other = NULL;
				1169	unix_state_double_lock(sk, other);
				1170	}
				1171
				1172	/*
				1173	* If it was connected, reconnect.
				1174	*/
				1175	if (unix_peer(sk)) {
				1176	struct sock *old_peer = unix_peer(sk);
				1177	unix_peer(sk) = other;
				1178	unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
				1179
				1180	unix_state_double_unlock(sk, other);
				1181
				1182	if (other != old_peer)
				1183	unix_dgram_disconnected(sk, old_peer);
				1184	sock_put(old_peer);
				1185	} else {
				1186	unix_peer(sk) = other;
				1187	unix_state_double_unlock(sk, other);
				1188	}
				1189	return 0;
				1190
				1191	out_unlock:
				1192	unix_state_double_unlock(sk, other);
				1193	sock_put(other);
				1194	out:
				1195	return err;
				1196	}
				1197
				1198	static long unix_wait_for_peer(struct sock *other, long timeo)
				1199	{
				1200	struct unix_sock *u = unix_sk(other);
				1201	int sched;
				1202	DEFINE_WAIT(wait);
				1203
				1204	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
				1205
				1206	sched = !sock_flag(other, SOCK_DEAD) &&
				1207	!(other->sk_shutdown & RCV_SHUTDOWN) &&
				1208	unix_recvq_full(other);
				1209
				1210	unix_state_unlock(other);
				1211
				1212	if (sched)
				1213	timeo = schedule_timeout(timeo);
				1214
				1215	finish_wait(&u->peer_wait, &wait);
				1216	return timeo;
				1217	}
				1218
				1219	static int unix_stream_connect(struct socket sock, struct sockaddr uaddr,
				1220	int addr_len, int flags)
				1221	{
				1222	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				1223	struct sock *sk = sock->sk;
				1224	struct net *net = sock_net(sk);
				1225	struct unix_sock u = unix_sk(sk), newu, *otheru;
				1226	struct sock *newsk = NULL;
				1227	struct sock *other = NULL;
				1228	struct sk_buff *skb = NULL;
				1229	unsigned int hash;
				1230	int st;
				1231	int err;
				1232	long timeo;
				1233
				1234	err = unix_mkname(sunaddr, addr_len, &hash);
				1235	if (err < 0)
				1236	goto out;
				1237	addr_len = err;
				1238
				1239	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
				1240	(err = unix_autobind(sock)) != 0)
				1241	goto out;
				1242
				1243	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
				1244
				1245	/* First of all allocate resources.
				1246	If we will make it after state is locked,
				1247	we will have to recheck all again in any case.
				1248	*/
				1249
				1250	err = -ENOMEM;
				1251
				1252	/* create new sock for complete connection */
				1253	newsk = unix_create1(sock_net(sk), NULL, 0);
				1254	if (newsk == NULL)
				1255	goto out;
				1256
				1257	/* Allocate skb for sending to listening sock */
				1258	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
				1259	if (skb == NULL)
				1260	goto out;
				1261
				1262	restart:
				1263	/* Find listening sock. */
				1264	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
				1265	if (!other)
				1266	goto out;
				1267
				1268	/* Latch state of peer */
				1269	unix_state_lock(other);
				1270
				1271	/* Apparently VFS overslept socket death. Retry. */
				1272	if (sock_flag(other, SOCK_DEAD)) {
				1273	unix_state_unlock(other);
				1274	sock_put(other);
				1275	goto restart;
				1276	}
				1277
				1278	err = -ECONNREFUSED;
				1279	if (other->sk_state != TCP_LISTEN)
				1280	goto out_unlock;
				1281	if (other->sk_shutdown & RCV_SHUTDOWN)
				1282	goto out_unlock;
				1283
				1284	if (unix_recvq_full(other)) {
				1285	err = -EAGAIN;
				1286	if (!timeo)
				1287	goto out_unlock;
				1288
				1289	timeo = unix_wait_for_peer(other, timeo);
				1290
				1291	err = sock_intr_errno(timeo);
				1292	if (signal_pending(current))
				1293	goto out;
				1294	sock_put(other);
				1295	goto restart;
				1296	}
				1297
				1298	/* Latch our state.
				1299
				1300	It is tricky place. We need to grab our state lock and cannot
				1301	drop lock on peer. It is dangerous because deadlock is
				1302	possible. Connect to self case and simultaneous
				1303	attempt to connect are eliminated by checking socket
				1304	state. other is TCP_LISTEN, if sk is TCP_LISTEN we
				1305	check this before attempt to grab lock.
				1306
				1307	Well, and we have to recheck the state after socket locked.
				1308	*/
				1309	st = sk->sk_state;
				1310
				1311	switch (st) {
				1312	case TCP_CLOSE:
				1313	/* This is ok... continue with connect */
				1314	break;
				1315	case TCP_ESTABLISHED:
				1316	/* Socket is already connected */
				1317	err = -EISCONN;
				1318	goto out_unlock;
				1319	default:
				1320	err = -EINVAL;
				1321	goto out_unlock;
				1322	}
				1323
				1324	unix_state_lock_nested(sk);
				1325
				1326	if (sk->sk_state != st) {
				1327	unix_state_unlock(sk);
				1328	unix_state_unlock(other);
				1329	sock_put(other);
				1330	goto restart;
				1331	}
				1332
				1333	err = security_unix_stream_connect(sk, other, newsk);
				1334	if (err) {
				1335	unix_state_unlock(sk);
				1336	goto out_unlock;
				1337	}
				1338
				1339	/* The way is open! Fastly set all the necessary fields... */
				1340
				1341	sock_hold(sk);
				1342	unix_peer(newsk) = sk;
				1343	newsk->sk_state = TCP_ESTABLISHED;
				1344	newsk->sk_type = sk->sk_type;
				1345	init_peercred(newsk);
				1346	newu = unix_sk(newsk);
				1347	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
				1348	otheru = unix_sk(other);
				1349
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1350	/* copy address information from listening to new sock
				1351	*
				1352	* The contents of *(otheru->addr) and otheru->path
				1353	* are seen fully set up here, since we have found
				1354	* otheru in hash under unix_table_lock. Insertion
				1355	* into the hash chain we'd found it in had been done
				1356	* in an earlier critical area protected by unix_table_lock,
				1357	* the same one where we'd set *(otheru->addr) contents,
				1358	* as well as otheru->path and otheru->addr itself.
				1359	*
				1360	* Using smp_store_release() here to set newu->addr
				1361	* is enough to make those stores, as well as stores
				1362	* to newu->path visible to anyone who gets newu->addr
				1363	* by smp_load_acquire(). IOW, the same warranties
				1364	* as for unix_sock instances bound in unix_bind() or
				1365	* in unix_autobind().
				1366	*/
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1367	if (otheru->path.dentry) {
				1368	path_get(&otheru->path);
				1369	newu->path = otheru->path;
				1370	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1371	refcount_inc(&otheru->addr->refcnt);
				1372	smp_store_release(&newu->addr, otheru->addr);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1373
				1374	/* Set credentials */
				1375	copy_peercred(sk, other);
				1376
				1377	sock->state = SS_CONNECTED;
				1378	sk->sk_state = TCP_ESTABLISHED;
				1379	sock_hold(newsk);
				1380
				1381	smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
				1382	unix_peer(sk) = newsk;
				1383
				1384	unix_state_unlock(sk);
				1385
				1386	/* take ten and and send info to listening sock */
				1387	spin_lock(&other->sk_receive_queue.lock);
				1388	__skb_queue_tail(&other->sk_receive_queue, skb);
				1389	spin_unlock(&other->sk_receive_queue.lock);
				1390	unix_state_unlock(other);
				1391	other->sk_data_ready(other);
				1392	sock_put(other);
				1393	return 0;
				1394
				1395	out_unlock:
				1396	if (other)
				1397	unix_state_unlock(other);
				1398
				1399	out:
				1400	kfree_skb(skb);
				1401	if (newsk)
				1402	unix_release_sock(newsk, 0);
				1403	if (other)
				1404	sock_put(other);
				1405	return err;
				1406	}
				1407
				1408	static int unix_socketpair(struct socket socka, struct socket sockb)
				1409	{
				1410	struct sock ska = socka->sk, skb = sockb->sk;
				1411
				1412	/* Join our sockets back to back */
				1413	sock_hold(ska);
				1414	sock_hold(skb);
				1415	unix_peer(ska) = skb;
				1416	unix_peer(skb) = ska;
				1417	init_peercred(ska);
				1418	init_peercred(skb);
				1419
				1420	if (ska->sk_type != SOCK_DGRAM) {
				1421	ska->sk_state = TCP_ESTABLISHED;
				1422	skb->sk_state = TCP_ESTABLISHED;
				1423	socka->state = SS_CONNECTED;
				1424	sockb->state = SS_CONNECTED;
				1425	}
				1426	return 0;
				1427	}
				1428
				1429	static void unix_sock_inherit_flags(const struct socket *old,
				1430	struct socket *new)
				1431	{
				1432	if (test_bit(SOCK_PASSCRED, &old->flags))
				1433	set_bit(SOCK_PASSCRED, &new->flags);
				1434	if (test_bit(SOCK_PASSSEC, &old->flags))
				1435	set_bit(SOCK_PASSSEC, &new->flags);
				1436	}
				1437
				1438	static int unix_accept(struct socket sock, struct socket newsock, int flags,
				1439	bool kern)
				1440	{
				1441	struct sock *sk = sock->sk;
				1442	struct sock *tsk;
				1443	struct sk_buff *skb;
				1444	int err;
				1445
				1446	err = -EOPNOTSUPP;
				1447	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				1448	goto out;
				1449
				1450	err = -EINVAL;
				1451	if (sk->sk_state != TCP_LISTEN)
				1452	goto out;
				1453
				1454	/* If socket state is TCP_LISTEN it cannot change (for now...),
				1455	* so that no locks are necessary.
				1456	*/
				1457
				1458	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
				1459	if (!skb) {
				1460	/* This means receive shutdown. */
				1461	if (err == 0)
				1462	err = -EINVAL;
				1463	goto out;
				1464	}
				1465
				1466	tsk = skb->sk;
				1467	skb_free_datagram(sk, skb);
				1468	wake_up_interruptible(&unix_sk(sk)->peer_wait);
				1469
				1470	/* attach accepted sock to socket */
				1471	unix_state_lock(tsk);
				1472	newsock->state = SS_CONNECTED;
				1473	unix_sock_inherit_flags(sock, newsock);
				1474	sock_graft(tsk, newsock);
				1475	unix_state_unlock(tsk);
				1476	return 0;
				1477
				1478	out:
				1479	return err;
				1480	}
				1481
				1482
				1483	static int unix_getname(struct socket sock, struct sockaddr uaddr, int peer)
				1484	{
				1485	struct sock *sk = sock->sk;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1486	struct unix_address *addr;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1487	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
				1488	int err = 0;
				1489
				1490	if (peer) {
				1491	sk = unix_peer_get(sk);
				1492
				1493	err = -ENOTCONN;
				1494	if (!sk)
				1495	goto out;
				1496	err = 0;
				1497	} else {
				1498	sock_hold(sk);
				1499	}
				1500
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1501	addr = smp_load_acquire(&unix_sk(sk)->addr);
				1502	if (!addr) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1503	sunaddr->sun_family = AF_UNIX;
				1504	sunaddr->sun_path[0] = 0;
				1505	err = sizeof(short);
				1506	} else {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1507	err = addr->len;
				1508	memcpy(sunaddr, addr->name, addr->len);
				1509	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1510	sock_put(sk);
				1511	out:
				1512	return err;
				1513	}
				1514
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1515	static void unix_peek_fds(struct scm_cookie scm, struct sk_buff skb)
				1516	{
				1517	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
				1518
				1519	/*
				1520	* Garbage collection of unix sockets starts by selecting a set of
				1521	* candidate sockets which have reference only from being in flight
				1522	* (total_refs == inflight_refs). This condition is checked once during
				1523	* the candidate collection phase, and candidates are marked as such, so
				1524	* that non-candidates can later be ignored. While inflight_refs is
				1525	* protected by unix_gc_lock, total_refs (file count) is not, hence this
				1526	* is an instantaneous decision.
				1527	*
				1528	* Once a candidate, however, the socket must not be reinstalled into a
				1529	* file descriptor while the garbage collection is in progress.
				1530	*
				1531	* If the above conditions are met, then the directed graph of
				1532	* candidates (*) does not change while unix_gc_lock is held.
				1533	*
				1534	* Any operations that changes the file count through file descriptors
				1535	* (dup, close, sendmsg) does not change the graph since candidates are
				1536	* not installed in fds.
				1537	*
				1538	* Dequeing a candidate via recvmsg would install it into an fd, but
				1539	* that takes unix_gc_lock to decrement the inflight count, so it's
				1540	* serialized with garbage collection.
				1541	*
				1542	* MSG_PEEK is special in that it does not change the inflight count,
				1543	* yet does install the socket into an fd. The following lock/unlock
				1544	* pair is to ensure serialization with garbage collection. It must be
				1545	* done between incrementing the file count and installing the file into
				1546	* an fd.
				1547	*
				1548	* If garbage collection starts after the barrier provided by the
				1549	* lock/unlock, then it will see the elevated refcount and not mark this
				1550	* as a candidate. If a garbage collection is already in progress
				1551	* before the file count was incremented, then the lock/unlock pair will
				1552	* ensure that garbage collection is finished before progressing to
				1553	* installing the fd.
				1554	*
				1555	* (*) A -> B where B is on the queue of A or B is on the queue of C
				1556	* which is on the queue of listening socket A.
				1557	*/
				1558	spin_lock(&unix_gc_lock);
				1559	spin_unlock(&unix_gc_lock);
				1560	}
				1561
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1562	static int unix_scm_to_skb(struct scm_cookie scm, struct sk_buff skb, bool send_fds)
				1563	{
				1564	int err = 0;
				1565
				1566	UNIXCB(skb).pid = get_pid(scm->pid);
				1567	UNIXCB(skb).uid = scm->creds.uid;
				1568	UNIXCB(skb).gid = scm->creds.gid;
				1569	UNIXCB(skb).fp = NULL;
				1570	unix_get_secdata(scm, skb);
				1571	if (scm->fp && send_fds)
				1572	err = unix_attach_fds(scm, skb);
				1573
				1574	skb->destructor = unix_destruct_scm;
				1575	return err;
				1576	}
				1577
				1578	static bool unix_passcred_enabled(const struct socket *sock,
				1579	const struct sock *other)
				1580	{
				1581	return test_bit(SOCK_PASSCRED, &sock->flags) \|\|
				1582	!other->sk_socket \|\|
				1583	test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
				1584	}
				1585
				1586	/*
				1587	* Some apps rely on write() giving SCM_CREDENTIALS
				1588	* We include credentials if source or destination socket
				1589	* asserted SOCK_PASSCRED.
				1590	*/
				1591	static void maybe_add_creds(struct sk_buff skb, const struct socket sock,
				1592	const struct sock *other)
				1593	{
				1594	if (UNIXCB(skb).pid)
				1595	return;
				1596	if (unix_passcred_enabled(sock, other)) {
				1597	UNIXCB(skb).pid = get_pid(task_tgid(current));
				1598	current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
				1599	}
				1600	}
				1601
				1602	static int maybe_init_creds(struct scm_cookie *scm,
				1603	struct socket *socket,
				1604	const struct sock *other)
				1605	{
				1606	int err;
				1607	struct msghdr msg = { .msg_controllen = 0 };
				1608
				1609	err = scm_send(socket, &msg, scm, false);
				1610	if (err)
				1611	return err;
				1612
				1613	if (unix_passcred_enabled(socket, other)) {
				1614	scm->pid = get_pid(task_tgid(current));
				1615	current_uid_gid(&scm->creds.uid, &scm->creds.gid);
				1616	}
				1617	return err;
				1618	}
				1619
				1620	static bool unix_skb_scm_eq(struct sk_buff *skb,
				1621	struct scm_cookie *scm)
				1622	{
				1623	const struct unix_skb_parms *u = &UNIXCB(skb);
				1624
				1625	return u->pid == scm->pid &&
				1626	uid_eq(u->uid, scm->creds.uid) &&
				1627	gid_eq(u->gid, scm->creds.gid) &&
				1628	unix_secdata_eq(scm, skb);
				1629	}
				1630
				1631	/*
				1632	* Send AF_UNIX data.
				1633	*/
				1634
				1635	static int unix_dgram_sendmsg(struct socket sock, struct msghdr msg,
				1636	size_t len)
				1637	{
				1638	struct sock *sk = sock->sk;
				1639	struct net *net = sock_net(sk);
				1640	struct unix_sock *u = unix_sk(sk);
				1641	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
				1642	struct sock *other = NULL;
				1643	int namelen = 0; /* fake GCC */
				1644	int err;
				1645	unsigned int hash;
				1646	struct sk_buff *skb;
				1647	long timeo;
				1648	struct scm_cookie scm;
				1649	int data_len = 0;
				1650	int sk_locked;
				1651
				1652	wait_for_unix_gc();
				1653	err = scm_send(sock, msg, &scm, false);
				1654	if (err < 0)
				1655	return err;
				1656
				1657	err = -EOPNOTSUPP;
				1658	if (msg->msg_flags&MSG_OOB)
				1659	goto out;
				1660
				1661	if (msg->msg_namelen) {
				1662	err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
				1663	if (err < 0)
				1664	goto out;
				1665	namelen = err;
				1666	} else {
				1667	sunaddr = NULL;
				1668	err = -ENOTCONN;
				1669	other = unix_peer_get(sk);
				1670	if (!other)
				1671	goto out;
				1672	}
				1673
				1674	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
				1675	&& (err = unix_autobind(sock)) != 0)
				1676	goto out;
				1677
				1678	err = -EMSGSIZE;
				1679	if (len > sk->sk_sndbuf - 32)
				1680	goto out;
				1681
				1682	if (len > SKB_MAX_ALLOC) {
				1683	data_len = min_t(size_t,
				1684	len - SKB_MAX_ALLOC,
				1685	MAX_SKB_FRAGS * PAGE_SIZE);
				1686	data_len = PAGE_ALIGN(data_len);
				1687
				1688	BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
				1689	}
				1690
				1691	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
				1692	msg->msg_flags & MSG_DONTWAIT, &err,
				1693	PAGE_ALLOC_COSTLY_ORDER);
				1694	if (skb == NULL)
				1695	goto out;
				1696
				1697	err = unix_scm_to_skb(&scm, skb, true);
				1698	if (err < 0)
				1699	goto out_free;
				1700
				1701	skb_put(skb, len - data_len);
				1702	skb->data_len = data_len;
				1703	skb->len = len;
				1704	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
				1705	if (err)
				1706	goto out_free;
				1707
				1708	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
				1709
				1710	restart:
				1711	if (!other) {
				1712	err = -ECONNRESET;
				1713	if (sunaddr == NULL)
				1714	goto out_free;
				1715
				1716	other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
				1717	hash, &err);
				1718	if (other == NULL)
				1719	goto out_free;
				1720	}
				1721
				1722	if (sk_filter(other, skb) < 0) {
				1723	/* Toss the packet but do not return any error to the sender */
				1724	err = len;
				1725	goto out_free;
				1726	}
				1727
				1728	sk_locked = 0;
				1729	unix_state_lock(other);
				1730	restart_locked:
				1731	err = -EPERM;
				1732	if (!unix_may_send(sk, other))
				1733	goto out_unlock;
				1734
				1735	if (unlikely(sock_flag(other, SOCK_DEAD))) {
				1736	/*
				1737	* Check with 1003.1g - what should
				1738	* datagram error
				1739	*/
				1740	unix_state_unlock(other);
				1741	sock_put(other);
				1742
				1743	if (!sk_locked)
				1744	unix_state_lock(sk);
				1745
				1746	err = 0;
				1747	if (unix_peer(sk) == other) {
				1748	unix_peer(sk) = NULL;
				1749	unix_dgram_peer_wake_disconnect_wakeup(sk, other);
				1750
				1751	unix_state_unlock(sk);
				1752
				1753	unix_dgram_disconnected(sk, other);
				1754	sock_put(other);
				1755	err = -ECONNREFUSED;
				1756	} else {
				1757	unix_state_unlock(sk);
				1758	}
				1759
				1760	other = NULL;
				1761	if (err)
				1762	goto out_free;
				1763	goto restart;
				1764	}
				1765
				1766	err = -EPIPE;
				1767	if (other->sk_shutdown & RCV_SHUTDOWN)
				1768	goto out_unlock;
				1769
				1770	if (sk->sk_type != SOCK_SEQPACKET) {
				1771	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1772	if (err)
				1773	goto out_unlock;
				1774	}
				1775
				1776	/* other == sk && unix_peer(other) != sk if
				1777	* - unix_peer(sk) == NULL, destination address bound to sk
				1778	* - unix_peer(sk) == sk by time of get but disconnected before lock
				1779	*/
				1780	if (other != sk &&
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1781	unlikely(unix_peer(other) != sk &&
				1782	unix_recvq_full_lockless(other))) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1783	if (timeo) {
				1784	timeo = unix_wait_for_peer(other, timeo);
				1785
				1786	err = sock_intr_errno(timeo);
				1787	if (signal_pending(current))
				1788	goto out_free;
				1789
				1790	goto restart;
				1791	}
				1792
				1793	if (!sk_locked) {
				1794	unix_state_unlock(other);
				1795	unix_state_double_lock(sk, other);
				1796	}
				1797
				1798	if (unix_peer(sk) != other \|\|
				1799	unix_dgram_peer_wake_me(sk, other)) {
				1800	err = -EAGAIN;
				1801	sk_locked = 1;
				1802	goto out_unlock;
				1803	}
				1804
				1805	if (!sk_locked) {
				1806	sk_locked = 1;
				1807	goto restart_locked;
				1808	}
				1809	}
				1810
				1811	if (unlikely(sk_locked))
				1812	unix_state_unlock(sk);
				1813
				1814	if (sock_flag(other, SOCK_RCVTSTAMP))
				1815	__net_timestamp(skb);
				1816	maybe_add_creds(skb, sock, other);
				1817	skb_queue_tail(&other->sk_receive_queue, skb);
				1818	unix_state_unlock(other);
				1819	other->sk_data_ready(other);
				1820	sock_put(other);
				1821	scm_destroy(&scm);
				1822	return len;
				1823
				1824	out_unlock:
				1825	if (sk_locked)
				1826	unix_state_unlock(sk);
				1827	unix_state_unlock(other);
				1828	out_free:
				1829	kfree_skb(skb);
				1830	out:
				1831	if (other)
				1832	sock_put(other);
				1833	scm_destroy(&scm);
				1834	return err;
				1835	}
				1836
				1837	/* We use paged skbs for stream sockets, and limit occupancy to 32768
				1838	* bytes, and a minimum of a full page.
				1839	*/
				1840	#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
				1841
				1842	static int unix_stream_sendmsg(struct socket sock, struct msghdr msg,
				1843	size_t len)
				1844	{
				1845	struct sock *sk = sock->sk;
				1846	struct sock *other = NULL;
				1847	int err, size;
				1848	struct sk_buff *skb;
				1849	int sent = 0;
				1850	struct scm_cookie scm;
				1851	bool fds_sent = false;
				1852	int data_len;
				1853
				1854	wait_for_unix_gc();
				1855	err = scm_send(sock, msg, &scm, false);
				1856	if (err < 0)
				1857	return err;
				1858
				1859	err = -EOPNOTSUPP;
				1860	if (msg->msg_flags&MSG_OOB)
				1861	goto out_err;
				1862
				1863	if (msg->msg_namelen) {
				1864	err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
				1865	goto out_err;
				1866	} else {
				1867	err = -ENOTCONN;
				1868	other = unix_peer(sk);
				1869	if (!other)
				1870	goto out_err;
				1871	}
				1872
				1873	if (sk->sk_shutdown & SEND_SHUTDOWN)
				1874	goto pipe_err;
				1875
				1876	while (sent < len) {
				1877	size = len - sent;
				1878
				1879	/* Keep two messages in the pipe so it schedules better */
				1880	size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
				1881
				1882	/* allow fallback to order-0 allocations */
				1883	size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
				1884
				1885	data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
				1886
				1887	data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
				1888
				1889	skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
				1890	msg->msg_flags & MSG_DONTWAIT, &err,
				1891	get_order(UNIX_SKB_FRAGS_SZ));
				1892	if (!skb)
				1893	goto out_err;
				1894
				1895	/* Only send the fds in the first buffer */
				1896	err = unix_scm_to_skb(&scm, skb, !fds_sent);
				1897	if (err < 0) {
				1898	kfree_skb(skb);
				1899	goto out_err;
				1900	}
				1901	fds_sent = true;
				1902
				1903	skb_put(skb, size - data_len);
				1904	skb->data_len = data_len;
				1905	skb->len = size;
				1906	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
				1907	if (err) {
				1908	kfree_skb(skb);
				1909	goto out_err;
				1910	}
				1911
				1912	unix_state_lock(other);
				1913
				1914	if (sock_flag(other, SOCK_DEAD) \|\|
				1915	(other->sk_shutdown & RCV_SHUTDOWN))
				1916	goto pipe_err_free;
				1917
				1918	maybe_add_creds(skb, sock, other);
				1919	skb_queue_tail(&other->sk_receive_queue, skb);
				1920	unix_state_unlock(other);
				1921	other->sk_data_ready(other);
				1922	sent += size;
				1923	}
				1924
				1925	scm_destroy(&scm);
				1926
				1927	return sent;
				1928
				1929	pipe_err_free:
				1930	unix_state_unlock(other);
				1931	kfree_skb(skb);
				1932	pipe_err:
				1933	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
				1934	send_sig(SIGPIPE, current, 0);
				1935	err = -EPIPE;
				1936	out_err:
				1937	scm_destroy(&scm);
				1938	return sent ? : err;
				1939	}
				1940
				1941	static ssize_t unix_stream_sendpage(struct socket socket, struct page page,
				1942	int offset, size_t size, int flags)
				1943	{
				1944	int err;
				1945	bool send_sigpipe = false;
				1946	bool init_scm = true;
				1947	struct scm_cookie scm;
				1948	struct sock other, sk = socket->sk;
				1949	struct sk_buff skb, newskb = NULL, *tail = NULL;
				1950
				1951	if (flags & MSG_OOB)
				1952	return -EOPNOTSUPP;
				1953
				1954	other = unix_peer(sk);
				1955	if (!other \|\| sk->sk_state != TCP_ESTABLISHED)
				1956	return -ENOTCONN;
				1957
				1958	if (false) {
				1959	alloc_skb:
				1960	unix_state_unlock(other);
				1961	mutex_unlock(&unix_sk(other)->iolock);
				1962	newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
				1963	&err, 0);
				1964	if (!newskb)
				1965	goto err;
				1966	}
				1967
				1968	/* we must acquire iolock as we modify already present
				1969	* skbs in the sk_receive_queue and mess with skb->len
				1970	*/
				1971	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
				1972	if (err) {
				1973	err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
				1974	goto err;
				1975	}
				1976
				1977	if (sk->sk_shutdown & SEND_SHUTDOWN) {
				1978	err = -EPIPE;
				1979	send_sigpipe = true;
				1980	goto err_unlock;
				1981	}
				1982
				1983	unix_state_lock(other);
				1984
				1985	if (sock_flag(other, SOCK_DEAD) \|\|
				1986	other->sk_shutdown & RCV_SHUTDOWN) {
				1987	err = -EPIPE;
				1988	send_sigpipe = true;
				1989	goto err_state_unlock;
				1990	}
				1991
				1992	if (init_scm) {
				1993	err = maybe_init_creds(&scm, socket, other);
				1994	if (err)
				1995	goto err_state_unlock;
				1996	init_scm = false;
				1997	}
				1998
				1999	skb = skb_peek_tail(&other->sk_receive_queue);
				2000	if (tail && tail == skb) {
				2001	skb = newskb;
				2002	} else if (!skb \|\| !unix_skb_scm_eq(skb, &scm)) {
				2003	if (newskb) {
				2004	skb = newskb;
				2005	} else {
				2006	tail = skb;
				2007	goto alloc_skb;
				2008	}
				2009	} else if (newskb) {
				2010	/* this is fast path, we don't necessarily need to
				2011	* call to kfree_skb even though with newskb == NULL
				2012	* this - does no harm
				2013	*/
				2014	consume_skb(newskb);
				2015	newskb = NULL;
				2016	}
				2017
				2018	if (skb_append_pagefrags(skb, page, offset, size)) {
				2019	tail = skb;
				2020	goto alloc_skb;
				2021	}
				2022
				2023	skb->len += size;
				2024	skb->data_len += size;
				2025	skb->truesize += size;
				2026	refcount_add(size, &sk->sk_wmem_alloc);
				2027
				2028	if (newskb) {
				2029	err = unix_scm_to_skb(&scm, skb, false);
				2030	if (err)
				2031	goto err_state_unlock;
				2032	spin_lock(&other->sk_receive_queue.lock);
				2033	__skb_queue_tail(&other->sk_receive_queue, newskb);
				2034	spin_unlock(&other->sk_receive_queue.lock);
				2035	}
				2036
				2037	unix_state_unlock(other);
				2038	mutex_unlock(&unix_sk(other)->iolock);
				2039
				2040	other->sk_data_ready(other);
				2041	scm_destroy(&scm);
				2042	return size;
				2043
				2044	err_state_unlock:
				2045	unix_state_unlock(other);
				2046	err_unlock:
				2047	mutex_unlock(&unix_sk(other)->iolock);
				2048	err:
				2049	kfree_skb(newskb);
				2050	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
				2051	send_sig(SIGPIPE, current, 0);
				2052	if (!init_scm)
				2053	scm_destroy(&scm);
				2054	return err;
				2055	}
				2056
				2057	static int unix_seqpacket_sendmsg(struct socket sock, struct msghdr msg,
				2058	size_t len)
				2059	{
				2060	int err;
				2061	struct sock *sk = sock->sk;
				2062
				2063	err = sock_error(sk);
				2064	if (err)
				2065	return err;
				2066
				2067	if (sk->sk_state != TCP_ESTABLISHED)
				2068	return -ENOTCONN;
				2069
				2070	if (msg->msg_namelen)
				2071	msg->msg_namelen = 0;
				2072
				2073	return unix_dgram_sendmsg(sock, msg, len);
				2074	}
				2075
				2076	static int unix_seqpacket_recvmsg(struct socket sock, struct msghdr msg,
				2077	size_t size, int flags)
				2078	{
				2079	struct sock *sk = sock->sk;
				2080
				2081	if (sk->sk_state != TCP_ESTABLISHED)
				2082	return -ENOTCONN;
				2083
				2084	return unix_dgram_recvmsg(sock, msg, size, flags);
				2085	}
				2086
				2087	static void unix_copy_addr(struct msghdr msg, struct sock sk)
				2088	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2089	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2090
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2091	if (addr) {
				2092	msg->msg_namelen = addr->len;
				2093	memcpy(msg->msg_name, addr->name, addr->len);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2094	}
				2095	}
				2096
				2097	static int unix_dgram_recvmsg(struct socket sock, struct msghdr msg,
				2098	size_t size, int flags)
				2099	{
				2100	struct scm_cookie scm;
				2101	struct sock *sk = sock->sk;
				2102	struct unix_sock *u = unix_sk(sk);
				2103	struct sk_buff skb, last;
				2104	long timeo;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2105	int skip;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2106	int err;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2107
				2108	err = -EOPNOTSUPP;
				2109	if (flags&MSG_OOB)
				2110	goto out;
				2111
				2112	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
				2113
				2114	do {
				2115	mutex_lock(&u->iolock);
				2116
				2117	skip = sk_peek_offset(sk, flags);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2118	skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
				2119	&last);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2120	if (skb)
				2121	break;
				2122
				2123	mutex_unlock(&u->iolock);
				2124
				2125	if (err != -EAGAIN)
				2126	break;
				2127	} while (timeo &&
				2128	!__skb_wait_for_more_packets(sk, &err, &timeo, last));
				2129
				2130	if (!skb) { /* implies iolock unlocked */
				2131	unix_state_lock(sk);
				2132	/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
				2133	if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
				2134	(sk->sk_shutdown & RCV_SHUTDOWN))
				2135	err = 0;
				2136	unix_state_unlock(sk);
				2137	goto out;
				2138	}
				2139
				2140	if (wq_has_sleeper(&u->peer_wait))
				2141	wake_up_interruptible_sync_poll(&u->peer_wait,
				2142	EPOLLOUT \| EPOLLWRNORM \|
				2143	EPOLLWRBAND);
				2144
				2145	if (msg->msg_name)
				2146	unix_copy_addr(msg, skb->sk);
				2147
				2148	if (size > skb->len - skip)
				2149	size = skb->len - skip;
				2150	else if (size < skb->len - skip)
				2151	msg->msg_flags \|= MSG_TRUNC;
				2152
				2153	err = skb_copy_datagram_msg(skb, skip, msg, size);
				2154	if (err)
				2155	goto out_free;
				2156
				2157	if (sock_flag(sk, SOCK_RCVTSTAMP))
				2158	__sock_recv_timestamp(msg, sk, skb);
				2159
				2160	memset(&scm, 0, sizeof(scm));
				2161
				2162	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
				2163	unix_set_secdata(&scm, skb);
				2164
				2165	if (!(flags & MSG_PEEK)) {
				2166	if (UNIXCB(skb).fp)
				2167	unix_detach_fds(&scm, skb);
				2168
				2169	sk_peek_offset_bwd(sk, skb->len);
				2170	} else {
				2171	/* It is questionable: on PEEK we could:
				2172	- do not return fds - good, but too simple 8)
				2173	- return fds, and do not return them on read (old strategy,
				2174	apparently wrong)
				2175	- clone fds (I chose it for now, it is the most universal
				2176	solution)
				2177
				2178	POSIX 1003.1g does not actually define this clearly
				2179	at all. POSIX 1003.1g doesn't define a lot of things
				2180	clearly however!
				2181
				2182	*/
				2183
				2184	sk_peek_offset_fwd(sk, size);
				2185
				2186	if (UNIXCB(skb).fp)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2187	unix_peek_fds(&scm, skb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2188	}
				2189	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
				2190
				2191	scm_recv(sock, msg, &scm, flags);
				2192
				2193	out_free:
				2194	skb_free_datagram(sk, skb);
				2195	mutex_unlock(&u->iolock);
				2196	out:
				2197	return err;
				2198	}
				2199
				2200	/*
				2201	* Sleep until more data has arrived. But check for races..
				2202	*/
				2203	static long unix_stream_data_wait(struct sock *sk, long timeo,
				2204	struct sk_buff *last, unsigned int last_len,
				2205	bool freezable)
				2206	{
				2207	struct sk_buff *tail;
				2208	DEFINE_WAIT(wait);
				2209
				2210	unix_state_lock(sk);
				2211
				2212	for (;;) {
				2213	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
				2214
				2215	tail = skb_peek_tail(&sk->sk_receive_queue);
				2216	if (tail != last \|\|
				2217	(tail && tail->len != last_len) \|\|
				2218	sk->sk_err \|\|
				2219	(sk->sk_shutdown & RCV_SHUTDOWN) \|\|
				2220	signal_pending(current) \|\|
				2221	!timeo)
				2222	break;
				2223
				2224	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
				2225	unix_state_unlock(sk);
				2226	if (freezable)
				2227	timeo = freezable_schedule_timeout(timeo);
				2228	else
				2229	timeo = schedule_timeout(timeo);
				2230	unix_state_lock(sk);
				2231
				2232	if (sock_flag(sk, SOCK_DEAD))
				2233	break;
				2234
				2235	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
				2236	}
				2237
				2238	finish_wait(sk_sleep(sk), &wait);
				2239	unix_state_unlock(sk);
				2240	return timeo;
				2241	}
				2242
				2243	static unsigned int unix_skb_len(const struct sk_buff *skb)
				2244	{
				2245	return skb->len - UNIXCB(skb).consumed;
				2246	}
				2247
				2248	struct unix_stream_read_state {
				2249	int (recv_actor)(struct sk_buff , int, int,
				2250	struct unix_stream_read_state *);
				2251	struct socket *socket;
				2252	struct msghdr *msg;
				2253	struct pipe_inode_info *pipe;
				2254	size_t size;
				2255	int flags;
				2256	unsigned int splice_flags;
				2257	};
				2258
				2259	static int unix_stream_read_generic(struct unix_stream_read_state *state,
				2260	bool freezable)
				2261	{
				2262	struct scm_cookie scm;
				2263	struct socket *sock = state->socket;
				2264	struct sock *sk = sock->sk;
				2265	struct unix_sock *u = unix_sk(sk);
				2266	int copied = 0;
				2267	int flags = state->flags;
				2268	int noblock = flags & MSG_DONTWAIT;
				2269	bool check_creds = false;
				2270	int target;
				2271	int err = 0;
				2272	long timeo;
				2273	int skip;
				2274	size_t size = state->size;
				2275	unsigned int last_len;
				2276
				2277	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
				2278	err = -EINVAL;
				2279	goto out;
				2280	}
				2281
				2282	if (unlikely(flags & MSG_OOB)) {
				2283	err = -EOPNOTSUPP;
				2284	goto out;
				2285	}
				2286
				2287	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
				2288	timeo = sock_rcvtimeo(sk, noblock);
				2289
				2290	memset(&scm, 0, sizeof(scm));
				2291
				2292	/* Lock the socket to prevent queue disordering
				2293	* while sleeps in memcpy_tomsg
				2294	*/
				2295	mutex_lock(&u->iolock);
				2296
				2297	skip = max(sk_peek_offset(sk, flags), 0);
				2298
				2299	do {
				2300	int chunk;
				2301	bool drop_skb;
				2302	struct sk_buff skb, last;
				2303
				2304	redo:
				2305	unix_state_lock(sk);
				2306	if (sock_flag(sk, SOCK_DEAD)) {
				2307	err = -ECONNRESET;
				2308	goto unlock;
				2309	}
				2310	last = skb = skb_peek(&sk->sk_receive_queue);
				2311	last_len = last ? last->len : 0;
				2312	again:
				2313	if (skb == NULL) {
				2314	if (copied >= target)
				2315	goto unlock;
				2316
				2317	/*
				2318	* POSIX 1003.1g mandates this order.
				2319	*/
				2320
				2321	err = sock_error(sk);
				2322	if (err)
				2323	goto unlock;
				2324	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2325	goto unlock;
				2326
				2327	unix_state_unlock(sk);
				2328	if (!timeo) {
				2329	err = -EAGAIN;
				2330	break;
				2331	}
				2332
				2333	mutex_unlock(&u->iolock);
				2334
				2335	timeo = unix_stream_data_wait(sk, timeo, last,
				2336	last_len, freezable);
				2337
				2338	if (signal_pending(current)) {
				2339	err = sock_intr_errno(timeo);
				2340	scm_destroy(&scm);
				2341	goto out;
				2342	}
				2343
				2344	mutex_lock(&u->iolock);
				2345	goto redo;
				2346	unlock:
				2347	unix_state_unlock(sk);
				2348	break;
				2349	}
				2350
				2351	while (skip >= unix_skb_len(skb)) {
				2352	skip -= unix_skb_len(skb);
				2353	last = skb;
				2354	last_len = skb->len;
				2355	skb = skb_peek_next(skb, &sk->sk_receive_queue);
				2356	if (!skb)
				2357	goto again;
				2358	}
				2359
				2360	unix_state_unlock(sk);
				2361
				2362	if (check_creds) {
				2363	/* Never glue messages from different writers */
				2364	if (!unix_skb_scm_eq(skb, &scm))
				2365	break;
				2366	} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
				2367	/* Copy credentials */
				2368	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
				2369	unix_set_secdata(&scm, skb);
				2370	check_creds = true;
				2371	}
				2372
				2373	/* Copy address just once */
				2374	if (state->msg && state->msg->msg_name) {
				2375	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
				2376	state->msg->msg_name);
				2377	unix_copy_addr(state->msg, skb->sk);
				2378	sunaddr = NULL;
				2379	}
				2380
				2381	chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
				2382	skb_get(skb);
				2383	chunk = state->recv_actor(skb, skip, chunk, state);
				2384	drop_skb = !unix_skb_len(skb);
				2385	/* skb is only safe to use if !drop_skb */
				2386	consume_skb(skb);
				2387	if (chunk < 0) {
				2388	if (copied == 0)
				2389	copied = -EFAULT;
				2390	break;
				2391	}
				2392	copied += chunk;
				2393	size -= chunk;
				2394
				2395	if (drop_skb) {
				2396	/* the skb was touched by a concurrent reader;
				2397	* we should not expect anything from this skb
				2398	* anymore and assume it invalid - we can be
				2399	* sure it was dropped from the socket queue
				2400	*
				2401	* let's report a short read
				2402	*/
				2403	err = 0;
				2404	break;
				2405	}
				2406
				2407	/* Mark read part of skb as used */
				2408	if (!(flags & MSG_PEEK)) {
				2409	UNIXCB(skb).consumed += chunk;
				2410
				2411	sk_peek_offset_bwd(sk, chunk);
				2412
				2413	if (UNIXCB(skb).fp)
				2414	unix_detach_fds(&scm, skb);
				2415
				2416	if (unix_skb_len(skb))
				2417	break;
				2418
				2419	skb_unlink(skb, &sk->sk_receive_queue);
				2420	consume_skb(skb);
				2421
				2422	if (scm.fp)
				2423	break;
				2424	} else {
				2425	/* It is questionable, see note in unix_dgram_recvmsg.
				2426	*/
				2427	if (UNIXCB(skb).fp)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2428	unix_peek_fds(&scm, skb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2429
				2430	sk_peek_offset_fwd(sk, chunk);
				2431
				2432	if (UNIXCB(skb).fp)
				2433	break;
				2434
				2435	skip = 0;
				2436	last = skb;
				2437	last_len = skb->len;
				2438	unix_state_lock(sk);
				2439	skb = skb_peek_next(skb, &sk->sk_receive_queue);
				2440	if (skb)
				2441	goto again;
				2442	unix_state_unlock(sk);
				2443	break;
				2444	}
				2445	} while (size);
				2446
				2447	mutex_unlock(&u->iolock);
				2448	if (state->msg)
				2449	scm_recv(sock, state->msg, &scm, flags);
				2450	else
				2451	scm_destroy(&scm);
				2452	out:
				2453	return copied ? : err;
				2454	}
				2455
				2456	static int unix_stream_read_actor(struct sk_buff *skb,
				2457	int skip, int chunk,
				2458	struct unix_stream_read_state *state)
				2459	{
				2460	int ret;
				2461
				2462	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
				2463	state->msg, chunk);
				2464	return ret ?: chunk;
				2465	}
				2466
				2467	static int unix_stream_recvmsg(struct socket sock, struct msghdr msg,
				2468	size_t size, int flags)
				2469	{
				2470	struct unix_stream_read_state state = {
				2471	.recv_actor = unix_stream_read_actor,
				2472	.socket = sock,
				2473	.msg = msg,
				2474	.size = size,
				2475	.flags = flags
				2476	};
				2477
				2478	return unix_stream_read_generic(&state, true);
				2479	}
				2480
				2481	static int unix_stream_splice_actor(struct sk_buff *skb,
				2482	int skip, int chunk,
				2483	struct unix_stream_read_state *state)
				2484	{
				2485	return skb_splice_bits(skb, state->socket->sk,
				2486	UNIXCB(skb).consumed + skip,
				2487	state->pipe, chunk, state->splice_flags);
				2488	}
				2489
				2490	static ssize_t unix_stream_splice_read(struct socket sock, loff_t ppos,
				2491	struct pipe_inode_info *pipe,
				2492	size_t size, unsigned int flags)
				2493	{
				2494	struct unix_stream_read_state state = {
				2495	.recv_actor = unix_stream_splice_actor,
				2496	.socket = sock,
				2497	.pipe = pipe,
				2498	.size = size,
				2499	.splice_flags = flags,
				2500	};
				2501
				2502	if (unlikely(*ppos))
				2503	return -ESPIPE;
				2504
				2505	if (sock->file->f_flags & O_NONBLOCK \|\|
				2506	flags & SPLICE_F_NONBLOCK)
				2507	state.flags = MSG_DONTWAIT;
				2508
				2509	return unix_stream_read_generic(&state, false);
				2510	}
				2511
				2512	static int unix_shutdown(struct socket *sock, int mode)
				2513	{
				2514	struct sock *sk = sock->sk;
				2515	struct sock *other;
				2516
				2517	if (mode < SHUT_RD \|\| mode > SHUT_RDWR)
				2518	return -EINVAL;
				2519	/* This maps:
				2520	* SHUT_RD (0) -> RCV_SHUTDOWN (1)
				2521	* SHUT_WR (1) -> SEND_SHUTDOWN (2)
				2522	* SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
				2523	*/
				2524	++mode;
				2525
				2526	unix_state_lock(sk);
				2527	sk->sk_shutdown \|= mode;
				2528	other = unix_peer(sk);
				2529	if (other)
				2530	sock_hold(other);
				2531	unix_state_unlock(sk);
				2532	sk->sk_state_change(sk);
				2533
				2534	if (other &&
				2535	(sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET)) {
				2536
				2537	int peer_mode = 0;
				2538
				2539	if (mode&RCV_SHUTDOWN)
				2540	peer_mode \|= SEND_SHUTDOWN;
				2541	if (mode&SEND_SHUTDOWN)
				2542	peer_mode \|= RCV_SHUTDOWN;
				2543	unix_state_lock(other);
				2544	other->sk_shutdown \|= peer_mode;
				2545	unix_state_unlock(other);
				2546	other->sk_state_change(other);
				2547	if (peer_mode == SHUTDOWN_MASK)
				2548	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
				2549	else if (peer_mode & RCV_SHUTDOWN)
				2550	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
				2551	}
				2552	if (other)
				2553	sock_put(other);
				2554
				2555	return 0;
				2556	}
				2557
				2558	long unix_inq_len(struct sock *sk)
				2559	{
				2560	struct sk_buff *skb;
				2561	long amount = 0;
				2562
				2563	if (sk->sk_state == TCP_LISTEN)
				2564	return -EINVAL;
				2565
				2566	spin_lock(&sk->sk_receive_queue.lock);
				2567	if (sk->sk_type == SOCK_STREAM \|\|
				2568	sk->sk_type == SOCK_SEQPACKET) {
				2569	skb_queue_walk(&sk->sk_receive_queue, skb)
				2570	amount += unix_skb_len(skb);
				2571	} else {
				2572	skb = skb_peek(&sk->sk_receive_queue);
				2573	if (skb)
				2574	amount = skb->len;
				2575	}
				2576	spin_unlock(&sk->sk_receive_queue.lock);
				2577
				2578	return amount;
				2579	}
				2580	EXPORT_SYMBOL_GPL(unix_inq_len);
				2581
				2582	long unix_outq_len(struct sock *sk)
				2583	{
				2584	return sk_wmem_alloc_get(sk);
				2585	}
				2586	EXPORT_SYMBOL_GPL(unix_outq_len);
				2587
				2588	static int unix_open_file(struct sock *sk)
				2589	{
				2590	struct path path;
				2591	struct file *f;
				2592	int fd;
				2593
				2594	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
				2595	return -EPERM;
				2596
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2597	if (!smp_load_acquire(&unix_sk(sk)->addr))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2598	return -ENOENT;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2599
				2600	path = unix_sk(sk)->path;
				2601	if (!path.dentry)
				2602	return -ENOENT;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2603
				2604	path_get(&path);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2605
				2606	fd = get_unused_fd_flags(O_CLOEXEC);
				2607	if (fd < 0)
				2608	goto out;
				2609
				2610	f = dentry_open(&path, O_PATH, current_cred());
				2611	if (IS_ERR(f)) {
				2612	put_unused_fd(fd);
				2613	fd = PTR_ERR(f);
				2614	goto out;
				2615	}
				2616
				2617	fd_install(fd, f);
				2618	out:
				2619	path_put(&path);
				2620
				2621	return fd;
				2622	}
				2623
				2624	static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
				2625	{
				2626	struct sock *sk = sock->sk;
				2627	long amount = 0;
				2628	int err;
				2629
				2630	switch (cmd) {
				2631	case SIOCOUTQ:
				2632	amount = unix_outq_len(sk);
				2633	err = put_user(amount, (int __user *)arg);
				2634	break;
				2635	case SIOCINQ:
				2636	amount = unix_inq_len(sk);
				2637	if (amount < 0)
				2638	err = amount;
				2639	else
				2640	err = put_user(amount, (int __user *)arg);
				2641	break;
				2642	case SIOCUNIXFILE:
				2643	err = unix_open_file(sk);
				2644	break;
				2645	default:
				2646	err = -ENOIOCTLCMD;
				2647	break;
				2648	}
				2649	return err;
				2650	}
				2651
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2652	#ifdef CONFIG_COMPAT
				2653	static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
				2654	{
				2655	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
				2656	}
				2657	#endif
				2658
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2659	static __poll_t unix_poll(struct file file, struct socket sock, poll_table *wait)
				2660	{
				2661	struct sock *sk = sock->sk;
				2662	__poll_t mask;
				2663
				2664	sock_poll_wait(file, sock, wait);
				2665	mask = 0;
				2666
				2667	/* exceptional events? */
				2668	if (sk->sk_err)
				2669	mask \|= EPOLLERR;
				2670	if (sk->sk_shutdown == SHUTDOWN_MASK)
				2671	mask \|= EPOLLHUP;
				2672	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2673	mask \|= EPOLLRDHUP \| EPOLLIN \| EPOLLRDNORM;
				2674
				2675	/* readable? */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2676	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2677	mask \|= EPOLLIN \| EPOLLRDNORM;
				2678
				2679	/* Connection-based need to check for termination and startup */
				2680	if ((sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) &&
				2681	sk->sk_state == TCP_CLOSE)
				2682	mask \|= EPOLLHUP;
				2683
				2684	/*
				2685	* we set writable also when the other side has shut down the
				2686	* connection. This prevents stuck sockets.
				2687	*/
				2688	if (unix_writable(sk))
				2689	mask \|= EPOLLOUT \| EPOLLWRNORM \| EPOLLWRBAND;
				2690
				2691	return mask;
				2692	}
				2693
				2694	static __poll_t unix_dgram_poll(struct file file, struct socket sock,
				2695	poll_table *wait)
				2696	{
				2697	struct sock sk = sock->sk, other;
				2698	unsigned int writable;
				2699	__poll_t mask;
				2700
				2701	sock_poll_wait(file, sock, wait);
				2702	mask = 0;
				2703
				2704	/* exceptional events? */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2705	if (sk->sk_err \|\| !skb_queue_empty_lockless(&sk->sk_error_queue))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2706	mask \|= EPOLLERR \|
				2707	(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
				2708
				2709	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2710	mask \|= EPOLLRDHUP \| EPOLLIN \| EPOLLRDNORM;
				2711	if (sk->sk_shutdown == SHUTDOWN_MASK)
				2712	mask \|= EPOLLHUP;
				2713
				2714	/* readable? */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2715	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2716	mask \|= EPOLLIN \| EPOLLRDNORM;
				2717
				2718	/* Connection-based need to check for termination and startup */
				2719	if (sk->sk_type == SOCK_SEQPACKET) {
				2720	if (sk->sk_state == TCP_CLOSE)
				2721	mask \|= EPOLLHUP;
				2722	/* connection hasn't started yet? */
				2723	if (sk->sk_state == TCP_SYN_SENT)
				2724	return mask;
				2725	}
				2726
				2727	/* No write status requested, avoid expensive OUT tests. */
				2728	if (!(poll_requested_events(wait) & (EPOLLWRBAND\|EPOLLWRNORM\|EPOLLOUT)))
				2729	return mask;
				2730
				2731	writable = unix_writable(sk);
				2732	if (writable) {
				2733	unix_state_lock(sk);
				2734
				2735	other = unix_peer(sk);
				2736	if (other && unix_peer(other) != sk &&
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2737	unix_recvq_full_lockless(other) &&
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2738	unix_dgram_peer_wake_me(sk, other))
				2739	writable = 0;
				2740
				2741	unix_state_unlock(sk);
				2742	}
				2743
				2744	if (writable)
				2745	mask \|= EPOLLOUT \| EPOLLWRNORM \| EPOLLWRBAND;
				2746	else
				2747	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
				2748
				2749	return mask;
				2750	}
				2751
				2752	#ifdef CONFIG_PROC_FS
				2753
				2754	#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
				2755
				2756	#define get_bucket(x) ((x) >> BUCKET_SPACE)
				2757	#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
				2758	#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE \| (o))
				2759
				2760	static struct sock unix_from_bucket(struct seq_file seq, loff_t *pos)
				2761	{
				2762	unsigned long offset = get_offset(*pos);
				2763	unsigned long bucket = get_bucket(*pos);
				2764	struct sock *sk;
				2765	unsigned long count = 0;
				2766
				2767	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
				2768	if (sock_net(sk) != seq_file_net(seq))
				2769	continue;
				2770	if (++count == offset)
				2771	break;
				2772	}
				2773
				2774	return sk;
				2775	}
				2776
				2777	static struct sock unix_next_socket(struct seq_file seq,
				2778	struct sock *sk,
				2779	loff_t *pos)
				2780	{
				2781	unsigned long bucket;
				2782
				2783	while (sk > (struct sock *)SEQ_START_TOKEN) {
				2784	sk = sk_next(sk);
				2785	if (!sk)
				2786	goto next_bucket;
				2787	if (sock_net(sk) == seq_file_net(seq))
				2788	return sk;
				2789	}
				2790
				2791	do {
				2792	sk = unix_from_bucket(seq, pos);
				2793	if (sk)
				2794	return sk;
				2795
				2796	next_bucket:
				2797	bucket = get_bucket(*pos) + 1;
				2798	*pos = set_bucket_offset(bucket, 1);
				2799	} while (bucket < ARRAY_SIZE(unix_socket_table));
				2800
				2801	return NULL;
				2802	}
				2803
				2804	static void unix_seq_start(struct seq_file seq, loff_t *pos)
				2805	__acquires(unix_table_lock)
				2806	{
				2807	spin_lock(&unix_table_lock);
				2808
				2809	if (!*pos)
				2810	return SEQ_START_TOKEN;
				2811
				2812	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
				2813	return NULL;
				2814
				2815	return unix_next_socket(seq, NULL, pos);
				2816	}
				2817
				2818	static void unix_seq_next(struct seq_file seq, void v, loff_t pos)
				2819	{
				2820	++*pos;
				2821	return unix_next_socket(seq, v, pos);
				2822	}
				2823
				2824	static void unix_seq_stop(struct seq_file seq, void v)
				2825	__releases(unix_table_lock)
				2826	{
				2827	spin_unlock(&unix_table_lock);
				2828	}
				2829
				2830	static int unix_seq_show(struct seq_file seq, void v)
				2831	{
				2832
				2833	if (v == SEQ_START_TOKEN)
				2834	seq_puts(seq, "Num RefCount Protocol Flags Type St "
				2835	"Inode Path\n");
				2836	else {
				2837	struct sock *s = v;
				2838	struct unix_sock *u = unix_sk(s);
				2839	unix_state_lock(s);
				2840
				2841	seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
				2842	s,
				2843	refcount_read(&s->sk_refcnt),
				2844	0,
				2845	s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
				2846	s->sk_type,
				2847	s->sk_socket ?
				2848	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
				2849	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
				2850	sock_i_ino(s));
				2851
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2852	if (u->addr) { // under unix_table_lock here
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2853	int i, len;
				2854	seq_putc(seq, ' ');
				2855
				2856	i = 0;
				2857	len = u->addr->len - sizeof(short);
				2858	if (!UNIX_ABSTRACT(s))
				2859	len--;
				2860	else {
				2861	seq_putc(seq, '@');
				2862	i++;
				2863	}
				2864	for ( ; i < len; i++)
				2865	seq_putc(seq, u->addr->name->sun_path[i] ?:
				2866	'@');
				2867	}
				2868	unix_state_unlock(s);
				2869	seq_putc(seq, '\n');
				2870	}
				2871
				2872	return 0;
				2873	}
				2874
				2875	static const struct seq_operations unix_seq_ops = {
				2876	.start = unix_seq_start,
				2877	.next = unix_seq_next,
				2878	.stop = unix_seq_stop,
				2879	.show = unix_seq_show,
				2880	};
				2881	#endif
				2882
				2883	static const struct net_proto_family unix_family_ops = {
				2884	.family = PF_UNIX,
				2885	.create = unix_create,
				2886	.owner = THIS_MODULE,
				2887	};
				2888
				2889
				2890	static int __net_init unix_net_init(struct net *net)
				2891	{
				2892	int error = -ENOMEM;
				2893
				2894	net->unx.sysctl_max_dgram_qlen = 10;
				2895	if (unix_sysctl_register(net))
				2896	goto out;
				2897
				2898	#ifdef CONFIG_PROC_FS
				2899	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
				2900	sizeof(struct seq_net_private))) {
				2901	unix_sysctl_unregister(net);
				2902	goto out;
				2903	}
				2904	#endif
				2905	error = 0;
				2906	out:
				2907	return error;
				2908	}
				2909
				2910	static void __net_exit unix_net_exit(struct net *net)
				2911	{
				2912	unix_sysctl_unregister(net);
				2913	remove_proc_entry("unix", net->proc_net);
				2914	}
				2915
				2916	static struct pernet_operations unix_net_ops = {
				2917	.init = unix_net_init,
				2918	.exit = unix_net_exit,
				2919	};
				2920
				2921	static int __init af_unix_init(void)
				2922	{
				2923	int rc = -1;
				2924
				2925	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
				2926
				2927	rc = proto_register(&unix_proto, 1);
				2928	if (rc != 0) {
				2929	pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
				2930	goto out;
				2931	}
				2932
				2933	sock_register(&unix_family_ops);
				2934	register_pernet_subsys(&unix_net_ops);
				2935	out:
				2936	return rc;
				2937	}
				2938
				2939	static void __exit af_unix_exit(void)
				2940	{
				2941	sock_unregister(PF_UNIX);
				2942	proto_unregister(&unix_proto);
				2943	unregister_pernet_subsys(&unix_net_ops);
				2944	}
				2945
				2946	/* Earlier than device_initcall() so that other drivers invoking
				2947	request_module() don't end up in a loop when modprobe tries
				2948	to use a UNIX socket. But later than subsys_initcall() because
				2949	we depend on stuff initialised there */
				2950	fs_initcall(af_unix_init);
				2951	module_exit(af_unix_exit);
				2952
				2953	MODULE_LICENSE("GPL");
				2954	MODULE_ALIAS_NETPROTO(PF_UNIX);