Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/kernel/futex.c b/kernel/futex.c
index f89abca..afbf928 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Fast Userspace Mutexes (which I call "Futexes!").
* (C) Rusty Russell, IBM 2002
@@ -29,21 +30,8 @@
*
* "The futexes are also cursed."
* "But they come in a choice of three flavours!"
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/fs.h>
@@ -65,8 +53,9 @@
#include <linux/sched/mm.h>
#include <linux/hugetlb.h>
#include <linux/freezer.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/fault-inject.h>
+#include <linux/refcount.h>
#include <asm/futex.h>
@@ -173,8 +162,10 @@
* double_lock_hb() and double_unlock_hb(), respectively.
*/
-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-int __read_mostly futex_cmpxchg_enabled;
+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+#define futex_cmpxchg_enabled 1
+#else
+static int __read_mostly futex_cmpxchg_enabled;
#endif
/*
@@ -209,7 +200,7 @@
struct rt_mutex pi_mutex;
struct task_struct *owner;
- atomic_t refcount;
+ refcount_t refcount;
union futex_key key;
} __randomize_layout;
@@ -318,12 +309,8 @@
if (IS_ERR(dir))
return PTR_ERR(dir);
- if (!debugfs_create_bool("ignore-private", mode, dir,
- &fail_futex.ignore_private)) {
- debugfs_remove_recursive(dir);
- return -ENOMEM;
- }
-
+ debugfs_create_bool("ignore-private", mode, dir,
+ &fail_futex.ignore_private);
return 0;
}
@@ -338,6 +325,12 @@
}
#endif /* CONFIG_FAIL_FUTEX */
+#ifdef CONFIG_COMPAT
+static void compat_exit_robust_list(struct task_struct *curr);
+#else
+static inline void compat_exit_robust_list(struct task_struct *curr) { }
+#endif
+
static inline void futex_get_mm(union futex_key *key)
{
mmgrab(key->private.mm);
@@ -478,13 +471,47 @@
}
}
+enum futex_access {
+ FUTEX_READ,
+ FUTEX_WRITE
+};
+
+/**
+ * futex_setup_timer - set up the sleeping hrtimer.
+ * @time: ptr to the given timeout value
+ * @timeout: the hrtimer_sleeper structure to be set up
+ * @flags: futex flags
+ * @range_ns: optional range in ns
+ *
+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
+ * value given
+ */
+static inline struct hrtimer_sleeper *
+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
+ int flags, u64 range_ns)
+{
+ if (!time)
+ return NULL;
+
+ hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
+ CLOCK_REALTIME : CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS);
+ /*
+ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
+ * effectively the same as calling hrtimer_set_expires().
+ */
+ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
+
+ return timeout;
+}
+
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
* @key: address where result is stored.
- * @rw: mapping needs to be read/write (values: VERIFY_READ,
- * VERIFY_WRITE)
+ * @rw: mapping needs to be read/write (values: FUTEX_READ,
+ * FUTEX_WRITE)
*
* Return: a negative error code or 0
*
@@ -497,7 +524,7 @@
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -513,7 +540,7 @@
return -EINVAL;
address -= key->both.offset;
- if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
+ if (unlikely(!access_ok(uaddr, sizeof(u32))))
return -EFAULT;
if (unlikely(should_fail_futex(fshared)))
@@ -538,12 +565,12 @@
if (unlikely(should_fail_futex(fshared)))
return -EFAULT;
- err = get_user_pages_fast(address, 1, 1, &page);
+ err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
/*
* If write access is not required (eg. FUTEX_WAIT), try
* and get read-only access.
*/
- if (err == -EFAULT && rw == VERIFY_READ) {
+ if (err == -EFAULT && rw == FUTEX_READ) {
err = get_user_pages_fast(address, 1, 0, &page);
ro = 1;
}
@@ -795,7 +822,7 @@
INIT_LIST_HEAD(&pi_state->list);
/* pi_mutex gets initialized later */
pi_state->owner = NULL;
- atomic_set(&pi_state->refcount, 1);
+ refcount_set(&pi_state->refcount, 1);
pi_state->key = FUTEX_KEY_INIT;
current->pi_state_cache = pi_state;
@@ -815,7 +842,7 @@
static void get_pi_state(struct futex_pi_state *pi_state)
{
- WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
+ WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
}
/*
@@ -827,7 +854,7 @@
if (!pi_state)
return;
- if (!atomic_dec_and_test(&pi_state->refcount))
+ if (!refcount_dec_and_test(&pi_state->refcount))
return;
/*
@@ -857,7 +884,7 @@
* refcount is at 0 - put it back to 1.
*/
pi_state->owner = NULL;
- atomic_set(&pi_state->refcount, 1);
+ refcount_set(&pi_state->refcount, 1);
current->pi_state_cache = pi_state;
}
}
@@ -869,7 +896,7 @@
* Kernel cleans up PI-state, but userspace is likely hosed.
* (Robust-futex cleanup is separate and might save the day for userspace.)
*/
-void exit_pi_state_list(struct task_struct *curr)
+static void exit_pi_state_list(struct task_struct *curr)
{
struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state;
@@ -900,7 +927,7 @@
* In that case; drop the locks to let put_pi_state() make
* progress and retry the loop.
*/
- if (!atomic_inc_not_zero(&pi_state->refcount)) {
+ if (!refcount_inc_not_zero(&pi_state->refcount)) {
raw_spin_unlock_irq(&curr->pi_lock);
cpu_relax();
raw_spin_lock_irq(&curr->pi_lock);
@@ -939,7 +966,8 @@
}
raw_spin_unlock_irq(&curr->pi_lock);
}
-
+#else
+static inline void exit_pi_state_list(struct task_struct *curr) { }
#endif
/*
@@ -1056,7 +1084,7 @@
* and futex_wait_requeue_pi() as it cannot go to 0 and consequently
* free pi_state before we can take a reference ourselves.
*/
- WARN_ON(!atomic_read(&pi_state->refcount));
+ WARN_ON(!refcount_read(&pi_state->refcount));
/*
* Now that we have a pi_state, we can acquire wait_lock
@@ -1148,16 +1176,47 @@
return ret;
}
+/**
+ * wait_for_owner_exiting - Block until the owner has exited
+ * @exiting: Pointer to the exiting task
+ *
+ * Caller must hold a refcount on @exiting.
+ */
+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
+{
+ if (ret != -EBUSY) {
+ WARN_ON_ONCE(exiting);
+ return;
+ }
+
+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
+ return;
+
+ mutex_lock(&exiting->futex_exit_mutex);
+ /*
+ * No point in doing state checking here. If the waiter got here
+ * while the task was in exec()->exec_futex_release() then it can
+ * have any FUTEX_STATE_* value when the waiter has acquired the
+ * mutex. OK, if running, EXITING or DEAD if it reached exit()
+ * already. Highly unlikely and not a problem. Just one more round
+ * through the futex maze.
+ */
+ mutex_unlock(&exiting->futex_exit_mutex);
+
+ put_task_struct(exiting);
+}
+
static int handle_exit_race(u32 __user *uaddr, u32 uval,
struct task_struct *tsk)
{
u32 uval2;
/*
- * If PF_EXITPIDONE is not yet set, then try again.
+ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+ * caller that the alleged owner is busy.
*/
- if (tsk && !(tsk->flags & PF_EXITPIDONE))
- return -EAGAIN;
+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
+ return -EBUSY;
/*
* Reread the user space value to handle the following situation:
@@ -1175,8 +1234,9 @@
* *uaddr = 0xC0000000; tsk = get_task(PID);
* } if (!tsk->flags & PF_EXITING) {
* ... attach();
- * tsk->flags |= PF_EXITPIDONE; } else {
- * if (!(tsk->flags & PF_EXITPIDONE))
+ * tsk->futex_state = } else {
+ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
+ * FUTEX_STATE_DEAD)
* return -EAGAIN;
* return -ESRCH; <--- FAIL
* }
@@ -1207,7 +1267,8 @@
* it after doing proper sanity checks.
*/
static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
- struct futex_pi_state **ps)
+ struct futex_pi_state **ps,
+ struct task_struct **exiting)
{
pid_t pid = uval & FUTEX_TID_MASK;
struct futex_pi_state *pi_state;
@@ -1232,22 +1293,33 @@
}
/*
- * We need to look at the task state flags to figure out,
- * whether the task is exiting. To protect against the do_exit
- * change of the task flags, we do this protected by
- * p->pi_lock:
+ * We need to look at the task state to figure out, whether the
+ * task is exiting. To protect against the change of the task state
+ * in futex_exit_release(), we do this protected by p->pi_lock:
*/
raw_spin_lock_irq(&p->pi_lock);
- if (unlikely(p->flags & PF_EXITING)) {
+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
/*
- * The task is on the way out. When PF_EXITPIDONE is
- * set, we know that the task has finished the
- * cleanup:
+ * The task is on the way out. When the futex state is
+ * FUTEX_STATE_DEAD, we know that the task has finished
+ * the cleanup:
*/
int ret = handle_exit_race(uaddr, uval, p);
raw_spin_unlock_irq(&p->pi_lock);
- put_task_struct(p);
+ /*
+ * If the owner task is between FUTEX_STATE_EXITING and
+ * FUTEX_STATE_DEAD then store the task pointer and keep
+ * the reference on the task struct. The calling code will
+ * drop all locks, wait for the task to reach
+ * FUTEX_STATE_DEAD and then drop the refcount. This is
+ * required to prevent a live lock when the current task
+ * preempted the exiting task between the two states.
+ */
+ if (ret == -EBUSY)
+ *exiting = p;
+ else
+ put_task_struct(p);
return ret;
}
@@ -1286,7 +1358,8 @@
static int lookup_pi_state(u32 __user *uaddr, u32 uval,
struct futex_hash_bucket *hb,
- union futex_key *key, struct futex_pi_state **ps)
+ union futex_key *key, struct futex_pi_state **ps,
+ struct task_struct **exiting)
{
struct futex_q *top_waiter = futex_top_waiter(hb, key);
@@ -1301,18 +1374,20 @@
* We are the first waiter - try to look up the owner based on
* @uval and attach to it.
*/
- return attach_to_pi_owner(uaddr, uval, key, ps);
+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
}
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
{
+ int err;
u32 uninitialized_var(curval);
if (unlikely(should_fail_futex(true)))
return -EFAULT;
- if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
- return -EFAULT;
+ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+ if (unlikely(err))
+ return err;
/* If user space value changed, let the caller retry */
return curval != uval ? -EAGAIN : 0;
@@ -1327,6 +1402,8 @@
* lookup
* @task: the task to perform the atomic lock work for. This will
* be "current" except in the case of requeue pi.
+ * @exiting: Pointer to store the task pointer of the owner task
+ * which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
*
* Return:
@@ -1335,11 +1412,17 @@
* - <0 - error
*
* The hb->lock and futex_key refs shall be held by the caller.
+ *
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
*/
static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
union futex_key *key,
struct futex_pi_state **ps,
- struct task_struct *task, int set_waiters)
+ struct task_struct *task,
+ struct task_struct **exiting,
+ int set_waiters)
{
u32 uval, newval, vpid = task_pid_vnr(task);
struct futex_q *top_waiter;
@@ -1409,7 +1492,7 @@
* attach to the owner. If that fails, no harm done, we only
* set the FUTEX_WAITERS bit in the user space variable.
*/
- return attach_to_pi_owner(uaddr, newval, key, ps);
+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
}
/**
@@ -1422,9 +1505,9 @@
{
struct futex_hash_bucket *hb;
- if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
- || WARN_ON(plist_node_empty(&q->list)))
+ if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
return;
+ lockdep_assert_held(q->lock_ptr);
hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
plist_del(&q->list, &hb->chain);
@@ -1444,11 +1527,7 @@
if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
return;
- /*
- * Queue the task for later wakeup for after we've released
- * the hb->lock. wake_q_add() grabs reference to p.
- */
- wake_q_add(wake_q, p);
+ get_task_struct(p);
__unqueue_futex(q);
/*
* The waiting task can free the futex_q as soon as q->lock_ptr = NULL
@@ -1458,6 +1537,12 @@
* plist_del in __unqueue_futex().
*/
smp_store_release(&q->lock_ptr, NULL);
+
+ /*
+ * Queue the task for later wakeup for after we've released
+ * the hb->lock. wake_q_add() grabs reference to p.
+ */
+ wake_q_add_safe(wake_q, p);
}
/*
@@ -1495,10 +1580,8 @@
if (unlikely(should_fail_futex(true)))
ret = -EFAULT;
- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
- ret = -EFAULT;
-
- } else if (curval != uval) {
+ ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+ if (!ret && (curval != uval)) {
/*
* If a unconditional UNLOCK_PI operation (user space did not
* try the TID->0 transition) raced with a waiter setting the
@@ -1580,7 +1663,7 @@
if (!bitset)
return -EINVAL;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
if (unlikely(ret != 0))
goto out;
@@ -1639,7 +1722,7 @@
oparg = 1 << oparg;
}
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
@@ -1679,10 +1762,10 @@
DEFINE_WAKE_Q(wake_q);
retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -1693,32 +1776,32 @@
double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
-
double_unlock_hb(hb1, hb2);
-#ifndef CONFIG_MMU
- /*
- * we don't get EFAULT from MMU faults if we don't have an MMU,
- * but we might get them from range checking
- */
- ret = op_ret;
- goto out_put_keys;
-#endif
-
- if (unlikely(op_ret != -EFAULT)) {
+ if (!IS_ENABLED(CONFIG_MMU) ||
+ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
+ /*
+ * we don't get EFAULT from MMU faults if we don't have
+ * an MMU, but we might get them from range checking
+ */
ret = op_ret;
goto out_put_keys;
}
- ret = fault_in_user_writeable(uaddr2);
- if (ret)
- goto out_put_keys;
+ if (op_ret == -EFAULT) {
+ ret = fault_in_user_writeable(uaddr2);
+ if (ret)
+ goto out_put_keys;
+ }
- if (!(flags & FLAGS_SHARED))
+ if (!(flags & FLAGS_SHARED)) {
+ cond_resched();
goto retry_private;
+ }
put_futex_key(&key2);
put_futex_key(&key1);
+ cond_resched();
goto retry;
}
@@ -1827,6 +1910,8 @@
* @key1: the from futex key
* @key2: the to futex key
* @ps: address to store the pi_state pointer
+ * @exiting: Pointer to store the task pointer of the owner task
+ * which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
*
* Try and get the lock on behalf of the top waiter if we can do it atomically.
@@ -1834,16 +1919,20 @@
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
* hb1 and hb2 must be held by the caller.
*
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
+ *
* Return:
* - 0 - failed to acquire the lock atomically;
* - >0 - acquired the lock, return value is vpid of the top_waiter
* - <0 - error
*/
-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
- struct futex_hash_bucket *hb1,
- struct futex_hash_bucket *hb2,
- union futex_key *key1, union futex_key *key2,
- struct futex_pi_state **ps, int set_waiters)
+static int
+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
+ struct futex_hash_bucket *hb2, union futex_key *key1,
+ union futex_key *key2, struct futex_pi_state **ps,
+ struct task_struct **exiting, int set_waiters)
{
struct futex_q *top_waiter = NULL;
u32 curval;
@@ -1880,7 +1969,7 @@
*/
vpid = task_pid_vnr(top_waiter->task);
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
- set_waiters);
+ exiting, set_waiters);
if (ret == 1) {
requeue_pi_wake_futex(top_waiter, key2, hb2);
return vpid;
@@ -1958,11 +2047,11 @@
}
retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
goto out;
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
- requeue_pi ? VERIFY_WRITE : VERIFY_READ);
+ requeue_pi ? FUTEX_WRITE : FUTEX_READ);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -2009,6 +2098,8 @@
}
if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+ struct task_struct *exiting = NULL;
+
/*
* Attempt to acquire uaddr2 and wake the top waiter. If we
* intend to requeue waiters, force setting the FUTEX_WAITERS
@@ -2016,7 +2107,8 @@
* faults rather in the requeue loop below.
*/
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
- &key2, &pi_state, nr_requeue);
+ &key2, &pi_state,
+ &exiting, nr_requeue);
/*
* At this point the top_waiter has either taken uaddr2 or is
@@ -2043,7 +2135,8 @@
* If that call succeeds then we have pi_state and an
* initial refcount on it.
*/
- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
+ &pi_state, &exiting);
}
switch (ret) {
@@ -2061,17 +2154,24 @@
if (!ret)
goto retry;
goto out;
+ case -EBUSY:
case -EAGAIN:
/*
* Two reasons for this:
- * - Owner is exiting and we just wait for the
+ * - EBUSY: Owner is exiting and we just wait for the
* exit to complete.
- * - The user space value changed.
+ * - EAGAIN: The user space value changed.
*/
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
put_futex_key(&key1);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
@@ -2210,11 +2310,11 @@
* decrement the counter at queue_unlock() when some error has
* occurred and we don't end up adding the task to the list.
*/
- hb_waiters_inc(hb);
+ hb_waiters_inc(hb); /* implies smp_mb(); (A) */
q->lock_ptr = &hb->lock;
- spin_lock(&hb->lock); /* implies smp_mb(); (A) */
+ spin_lock(&hb->lock);
return hb;
}
@@ -2343,7 +2443,7 @@
u32 uval, uninitialized_var(curval), newval;
struct task_struct *oldowner, *newowner;
u32 newtid;
- int ret;
+ int ret, err = 0;
lockdep_assert_held(q->lock_ptr);
@@ -2414,14 +2514,17 @@
if (!pi_state->owner)
newtid |= FUTEX_OWNER_DIED;
- if (get_futex_value_locked(&uval, uaddr))
- goto handle_fault;
+ err = get_futex_value_locked(&uval, uaddr);
+ if (err)
+ goto handle_err;
for (;;) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
- goto handle_fault;
+ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+ if (err)
+ goto handle_err;
+
if (curval == uval)
break;
uval = curval;
@@ -2449,23 +2552,37 @@
return 0;
/*
- * To handle the page fault we need to drop the locks here. That gives
- * the other task (either the highest priority waiter itself or the
- * task which stole the rtmutex) the chance to try the fixup of the
- * pi_state. So once we are back from handling the fault we need to
- * check the pi_state after reacquiring the locks and before trying to
- * do another fixup. When the fixup has been done already we simply
- * return.
+ * In order to reschedule or handle a page fault, we need to drop the
+ * locks here. In the case of a fault, this gives the other task
+ * (either the highest priority waiter itself or the task which stole
+ * the rtmutex) the chance to try the fixup of the pi_state. So once we
+ * are back from handling the fault we need to check the pi_state after
+ * reacquiring the locks and before trying to do another fixup. When
+ * the fixup has been done already we simply return.
*
* Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
* drop hb->lock since the caller owns the hb -> futex_q relation.
* Dropping the pi_mutex->wait_lock requires the state revalidate.
*/
-handle_fault:
+handle_err:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(q->lock_ptr);
- ret = fault_in_user_writeable(uaddr);
+ switch (err) {
+ case -EFAULT:
+ ret = fault_in_user_writeable(uaddr);
+ break;
+
+ case -EAGAIN:
+ cond_resched();
+ ret = 0;
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ ret = err;
+ break;
+ }
spin_lock(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
@@ -2571,7 +2688,7 @@
/* Arm the timer */
if (timeout)
- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
/*
* If we have been removed from the hash list, then another task
@@ -2631,7 +2748,7 @@
* while the syscall executes.
*/
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
if (unlikely(ret != 0))
return ret;
@@ -2668,7 +2785,7 @@
static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
ktime_t *abs_time, u32 bitset)
{
- struct hrtimer_sleeper timeout, *to = NULL;
+ struct hrtimer_sleeper timeout, *to;
struct restart_block *restart;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
@@ -2678,17 +2795,8 @@
return -EINVAL;
q.bitset = bitset;
- if (abs_time) {
- to = &timeout;
-
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
- }
-
+ to = futex_setup_timer(abs_time, &timeout, flags,
+ current->timer_slack_ns);
retry:
/*
* Prepare to wait on uaddr. On success, holds hb lock and increments
@@ -2768,8 +2876,9 @@
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
- struct hrtimer_sleeper timeout, *to = NULL;
+ struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
+ struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
@@ -2781,23 +2890,18 @@
if (refill_pi_state_cache())
return -ENOMEM;
- if (time) {
- to = &timeout;
- hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
- hrtimer_set_expires(&to->timer, *time);
- }
+ to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
if (unlikely(ret != 0))
goto out;
retry_private:
hb = queue_lock(&q);
- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
+ &exiting, 0);
if (unlikely(ret)) {
/*
* Atomic work succeeded and we got the lock,
@@ -2810,15 +2914,22 @@
goto out_unlock_put_key;
case -EFAULT:
goto uaddr_faulted;
+ case -EBUSY:
case -EAGAIN:
/*
* Two reasons for this:
- * - Task is exiting and we just wait for the
+ * - EBUSY: Task is exiting and we just wait for the
* exit to complete.
- * - The user space value changed.
+ * - EAGAIN: The user space value changed.
*/
queue_unlock(hb);
put_futex_key(&q.key);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
@@ -2850,35 +2961,39 @@
* and BUG when futex_unlock_pi() interleaves with this.
*
* Therefore acquire wait_lock while holding hb->lock, but drop the
- * latter before calling rt_mutex_start_proxy_lock(). This still fully
- * serializes against futex_unlock_pi() as that does the exact same
- * lock handoff sequence.
+ * latter before calling __rt_mutex_start_proxy_lock(). This
+ * interleaves with futex_unlock_pi() -- which does a similar lock
+ * handoff -- such that the latter can observe the futex_q::pi_state
+ * before __rt_mutex_start_proxy_lock() is done.
*/
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
spin_unlock(q.lock_ptr);
+ /*
+ * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
+ * such that futex_unlock_pi() is guaranteed to observe the waiter when
+ * it sees the futex_q::pi_state.
+ */
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
if (ret) {
if (ret == 1)
ret = 0;
-
- spin_lock(q.lock_ptr);
- goto no_block;
+ goto cleanup;
}
-
if (unlikely(to))
- hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
+cleanup:
spin_lock(q.lock_ptr);
/*
- * If we failed to acquire the lock (signal/timeout), we must
+ * If we failed to acquire the lock (deadlock/signal/timeout), we must
* first acquire the hb->lock before removing the lock from the
- * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
- * wait lists consistent.
+ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
+ * lists consistent.
*
* In particular; it is important that futex_unlock_pi() can not
* observe this inconsistency.
@@ -2969,7 +3084,7 @@
if ((uval & FUTEX_TID_MASK) != vpid)
return -EPERM;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
if (ret)
return ret;
@@ -3002,6 +3117,10 @@
* there is no point where we hold neither; and therefore
* wake_futex_pi() must observe a state consistent with what we
* observed.
+ *
+ * In particular; this forces __rt_mutex_start_proxy() to
+ * complete such that we're guaranteed to observe the
+ * rt_waiter. Also see the WARN in wake_futex_pi().
*/
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
@@ -3026,10 +3145,8 @@
* A unconditional UNLOCK_PI op raced against a waiter
* setting the FUTEX_WAITERS bit. Try again.
*/
- if (ret == -EAGAIN) {
- put_futex_key(&key);
- goto retry;
- }
+ if (ret == -EAGAIN)
+ goto pi_retry;
/*
* wake_futex_pi has detected invalid state. Tell user
* space.
@@ -3044,9 +3161,19 @@
* preserve the WAITERS bit not the OWNER_DIED one. We are the
* owner.
*/
- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) {
+ if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
spin_unlock(&hb->lock);
- goto pi_faulted;
+ switch (ret) {
+ case -EFAULT:
+ goto pi_faulted;
+
+ case -EAGAIN:
+ goto pi_retry;
+
+ default:
+ WARN_ON_ONCE(1);
+ goto out_putkey;
+ }
}
/*
@@ -3060,6 +3187,11 @@
put_futex_key(&key);
return ret;
+pi_retry:
+ put_futex_key(&key);
+ cond_resched();
+ goto retry;
+
pi_faulted:
put_futex_key(&key);
@@ -3163,7 +3295,7 @@
u32 val, ktime_t *abs_time, u32 bitset,
u32 __user *uaddr2)
{
- struct hrtimer_sleeper timeout, *to = NULL;
+ struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
@@ -3180,15 +3312,8 @@
if (!bitset)
return -EINVAL;
- if (abs_time) {
- to = &timeout;
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
- }
+ to = futex_setup_timer(abs_time, &timeout, flags,
+ current->timer_slack_ns);
/*
* The waiter is allocated on our stack, manipulated by the requeue
@@ -3196,7 +3321,7 @@
*/
rt_mutex_init_waiter(&rt_waiter);
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
if (unlikely(ret != 0))
goto out;
@@ -3413,54 +3538,115 @@
return ret;
}
+/* Constants for the pending_op argument of handle_futex_death */
+#define HANDLE_DEATH_PENDING true
+#define HANDLE_DEATH_LIST false
+
/*
* Process a futex-list entry, check whether it's owned by the
* dying task, and do notification if so:
*/
-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
+ bool pi, bool pending_op)
{
u32 uval, uninitialized_var(nval), mval;
+ int err;
+
+ /* Futex address must be 32bit aligned */
+ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
+ return -1;
retry:
if (get_user(uval, uaddr))
return -1;
- if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
- /*
- * Ok, this dying thread is truly holding a futex
- * of interest. Set the OWNER_DIED bit atomically
- * via cmpxchg, and if the value had FUTEX_WAITERS
- * set, wake up a waiter (if any). (We have to do a
- * futex_wake() even if OWNER_DIED is already set -
- * to handle the rare but possible case of recursive
- * thread-death.) The rest of the cleanup is done in
- * userspace.
- */
- mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
- /*
- * We are not holding a lock here, but we want to have
- * the pagefault_disable/enable() protection because
- * we want to handle the fault gracefully. If the
- * access fails we try to fault in the futex with R/W
- * verification via get_user_pages. get_user() above
- * does not guarantee R/W access. If that fails we
- * give up and leave the futex locked.
- */
- if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
+ /*
+ * Special case for regular (non PI) futexes. The unlock path in
+ * user space has two race scenarios:
+ *
+ * 1. The unlock path releases the user space futex value and
+ * before it can execute the futex() syscall to wake up
+ * waiters it is killed.
+ *
+ * 2. A woken up waiter is killed before it can acquire the
+ * futex in user space.
+ *
+ * In both cases the TID validation below prevents a wakeup of
+ * potential waiters which can cause these waiters to block
+ * forever.
+ *
+ * In both cases the following conditions are met:
+ *
+ * 1) task->robust_list->list_op_pending != NULL
+ * @pending_op == true
+ * 2) User space futex value == 0
+ * 3) Regular futex: @pi == false
+ *
+ * If these conditions are met, it is safe to attempt waking up a
+ * potential waiter without touching the user space futex value and
+ * trying to set the OWNER_DIED bit. The user space futex value is
+ * uncontended and the rest of the user space mutex state is
+ * consistent, so a woken waiter will just take over the
+ * uncontended futex. Setting the OWNER_DIED bit would create
+ * inconsistent state and malfunction of the user space owner died
+ * handling.
+ */
+ if (pending_op && !pi && !uval) {
+ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+ return 0;
+ }
+
+ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+ return 0;
+
+ /*
+ * Ok, this dying thread is truly holding a futex
+ * of interest. Set the OWNER_DIED bit atomically
+ * via cmpxchg, and if the value had FUTEX_WAITERS
+ * set, wake up a waiter (if any). (We have to do a
+ * futex_wake() even if OWNER_DIED is already set -
+ * to handle the rare but possible case of recursive
+ * thread-death.) The rest of the cleanup is done in
+ * userspace.
+ */
+ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+
+ /*
+ * We are not holding a lock here, but we want to have
+ * the pagefault_disable/enable() protection because
+ * we want to handle the fault gracefully. If the
+ * access fails we try to fault in the futex with R/W
+ * verification via get_user_pages. get_user() above
+ * does not guarantee R/W access. If that fails we
+ * give up and leave the futex locked.
+ */
+ if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
+ switch (err) {
+ case -EFAULT:
if (fault_in_user_writeable(uaddr))
return -1;
goto retry;
- }
- if (nval != uval)
+
+ case -EAGAIN:
+ cond_resched();
goto retry;
- /*
- * Wake robust non-PI futexes here. The wakeup of
- * PI futexes happens in exit_pi_state():
- */
- if (!pi && (uval & FUTEX_WAITERS))
- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+ default:
+ WARN_ON_ONCE(1);
+ return err;
+ }
}
+
+ if (nval != uval)
+ goto retry;
+
+ /*
+ * Wake robust non-PI futexes here. The wakeup of
+ * PI futexes happens in exit_pi_state():
+ */
+ if (!pi && (uval & FUTEX_WAITERS))
+ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+
return 0;
}
@@ -3488,7 +3674,7 @@
*
* We silently return on any sign of list-walking problem.
*/
-void exit_robust_list(struct task_struct *curr)
+static void exit_robust_list(struct task_struct *curr)
{
struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *next_entry, *pending;
@@ -3529,10 +3715,11 @@
* A pending lock might already be on the list, so
* don't process it twice:
*/
- if (entry != pending)
+ if (entry != pending) {
if (handle_futex_death((void __user *)entry + futex_offset,
- curr, pi))
+ curr, pi, HANDLE_DEATH_LIST))
return;
+ }
if (rc)
return;
entry = next_entry;
@@ -3546,9 +3733,118 @@
cond_resched();
}
- if (pending)
+ if (pending) {
handle_futex_death((void __user *)pending + futex_offset,
- curr, pip);
+ curr, pip, HANDLE_DEATH_PENDING);
+ }
+}
+
+static void futex_cleanup(struct task_struct *tsk)
+{
+ if (unlikely(tsk->robust_list)) {
+ exit_robust_list(tsk);
+ tsk->robust_list = NULL;
+ }
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(tsk->compat_robust_list)) {
+ compat_exit_robust_list(tsk);
+ tsk->compat_robust_list = NULL;
+ }
+#endif
+
+ if (unlikely(!list_empty(&tsk->pi_state_list)))
+ exit_pi_state_list(tsk);
+}
+
+/**
+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk: task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This is called from the recursive fault handling path in do_exit().
+ *
+ * This is best effort. Either the futex exit code has run already or
+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
+ * take it over. If not, the problem is pushed back to user space. If the
+ * futex exit code did not run yet, then an already queued waiter might
+ * block forever, but there is nothing which can be done about that.
+ */
+void futex_exit_recursive(struct task_struct *tsk)
+{
+ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
+ if (tsk->futex_state == FUTEX_STATE_EXITING)
+ mutex_unlock(&tsk->futex_exit_mutex);
+ tsk->futex_state = FUTEX_STATE_DEAD;
+}
+
+static void futex_cleanup_begin(struct task_struct *tsk)
+{
+ /*
+ * Prevent various race issues against a concurrent incoming waiter
+ * including live locks by forcing the waiter to block on
+ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
+ * attach_to_pi_owner().
+ */
+ mutex_lock(&tsk->futex_exit_mutex);
+
+ /*
+ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
+ *
+ * This ensures that all subsequent checks of tsk->futex_state in
+ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
+ * tsk->pi_lock held.
+ *
+ * It guarantees also that a pi_state which was queued right before
+ * the state change under tsk->pi_lock by a concurrent waiter must
+ * be observed in exit_pi_state_list().
+ */
+ raw_spin_lock_irq(&tsk->pi_lock);
+ tsk->futex_state = FUTEX_STATE_EXITING;
+ raw_spin_unlock_irq(&tsk->pi_lock);
+}
+
+static void futex_cleanup_end(struct task_struct *tsk, int state)
+{
+ /*
+ * Lockless store. The only side effect is that an observer might
+ * take another loop until it becomes visible.
+ */
+ tsk->futex_state = state;
+ /*
+ * Drop the exit protection. This unblocks waiters which observed
+ * FUTEX_STATE_EXITING to reevaluate the state.
+ */
+ mutex_unlock(&tsk->futex_exit_mutex);
+}
+
+void futex_exec_release(struct task_struct *tsk)
+{
+ /*
+ * The state handling is done for consistency, but in the case of
+ * exec() there is no way to prevent futher damage as the PID stays
+ * the same. But for the unlikely and arguably buggy case that a
+ * futex is held on exec(), this provides at least as much state
+ * consistency protection which is possible.
+ */
+ futex_cleanup_begin(tsk);
+ futex_cleanup(tsk);
+ /*
+ * Reset the state to FUTEX_STATE_OK. The task is alive and about
+ * exec a new binary.
+ */
+ futex_cleanup_end(tsk, FUTEX_STATE_OK);
+}
+
+void futex_exit_release(struct task_struct *tsk)
+{
+ futex_cleanup_begin(tsk);
+ futex_cleanup(tsk);
+ futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
}
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
@@ -3612,10 +3908,10 @@
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- struct timespec __user *, utime, u32 __user *, uaddr2,
+ struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
u32, val3)
{
- struct timespec ts;
+ struct timespec64 ts;
ktime_t t, *tp = NULL;
u32 val2 = 0;
int cmd = op & FUTEX_CMD_MASK;
@@ -3625,12 +3921,12 @@
cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
return -EFAULT;
- if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
+ if (get_timespec64(&ts, utime))
return -EFAULT;
- if (!timespec_valid(&ts))
+ if (!timespec64_valid(&ts))
return -EINVAL;
- t = timespec_to_ktime(ts);
+ t = timespec64_to_ktime(ts);
if (cmd == FUTEX_WAIT)
t = ktime_add_safe(ktime_get(), t);
tp = &t;
@@ -3646,6 +3942,195 @@
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
+#ifdef CONFIG_COMPAT
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int
+compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+ compat_uptr_t __user *head, unsigned int *pi)
+{
+ if (get_user(*uentry, head))
+ return -EFAULT;
+
+ *entry = compat_ptr((*uentry) & ~1);
+ *pi = (unsigned int)(*uentry) & 1;
+
+ return 0;
+}
+
+static void __user *futex_uaddr(struct robust_list __user *entry,
+ compat_long_t futex_offset)
+{
+ compat_uptr_t base = ptr_to_compat(entry);
+ void __user *uaddr = compat_ptr(base + futex_offset);
+
+ return uaddr;
+}
+
+/*
+ * Walk curr->robust_list (very carefully, it's a userspace list!)
+ * and mark any locks found there dead, and notify any waiters.
+ *
+ * We silently return on any sign of list-walking problem.
+ */
+static void compat_exit_robust_list(struct task_struct *curr)
+{
+ struct compat_robust_list_head __user *head = curr->compat_robust_list;
+ struct robust_list __user *entry, *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+ unsigned int uninitialized_var(next_pi);
+ compat_uptr_t uentry, next_uentry, upending;
+ compat_long_t futex_offset;
+ int rc;
+
+ if (!futex_cmpxchg_enabled)
+ return;
+
+ /*
+ * Fetch the list head (which was registered earlier, via
+ * sys_set_robust_list()):
+ */
+ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+ return;
+ /*
+ * Fetch the relative futex offset:
+ */
+ if (get_user(futex_offset, &head->futex_offset))
+ return;
+ /*
+ * Fetch any possibly pending lock-add first, and handle it
+ * if it exists:
+ */
+ if (compat_fetch_robust_entry(&upending, &pending,
+ &head->list_op_pending, &pip))
+ return;
+
+ next_entry = NULL; /* avoid warning with gcc */
+ while (entry != (struct robust_list __user *) &head->list) {
+ /*
+ * Fetch the next entry in the list before calling
+ * handle_futex_death:
+ */
+ rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
+ (compat_uptr_t __user *)&entry->next, &next_pi);
+ /*
+ * A pending lock might already be on the list, so
+ * dont process it twice:
+ */
+ if (entry != pending) {
+ void __user *uaddr = futex_uaddr(entry, futex_offset);
+
+ if (handle_futex_death(uaddr, curr, pi,
+ HANDLE_DEATH_LIST))
+ return;
+ }
+ if (rc)
+ return;
+ uentry = next_uentry;
+ entry = next_entry;
+ pi = next_pi;
+ /*
+ * Avoid excessively long or circular lists:
+ */
+ if (!--limit)
+ break;
+
+ cond_resched();
+ }
+ if (pending) {
+ void __user *uaddr = futex_uaddr(pending, futex_offset);
+
+ handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
+ }
+}
+
+COMPAT_SYSCALL_DEFINE2(set_robust_list,
+ struct compat_robust_list_head __user *, head,
+ compat_size_t, len)
+{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ if (unlikely(len != sizeof(*head)))
+ return -EINVAL;
+
+ current->compat_robust_list = head;
+
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+ compat_uptr_t __user *, head_ptr,
+ compat_size_t __user *, len_ptr)
+{
+ struct compat_robust_list_head __user *head;
+ unsigned long ret;
+ struct task_struct *p;
+
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
+ if (!pid)
+ p = current;
+ else {
+ p = find_task_by_vpid(pid);
+ if (!p)
+ goto err_unlock;
+ }
+
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = p->compat_robust_list;
+ rcu_read_unlock();
+
+ if (put_user(sizeof(*head), len_ptr))
+ return -EFAULT;
+ return put_user(ptr_to_compat(head), head_ptr);
+
+err_unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
+ struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
+ u32, val3)
+{
+ struct timespec64 ts;
+ ktime_t t, *tp = NULL;
+ int val2 = 0;
+ int cmd = op & FUTEX_CMD_MASK;
+
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+ cmd == FUTEX_WAIT_BITSET ||
+ cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ if (get_old_timespec32(&ts, utime))
+ return -EFAULT;
+ if (!timespec64_valid(&ts))
+ return -EINVAL;
+
+ t = timespec64_to_ktime(ts);
+ if (cmd == FUTEX_WAIT)
+ t = ktime_add_safe(ktime_get(), t);
+ tp = &t;
+ }
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+ val2 = (int) (unsigned long) utime;
+
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+}
+#endif /* CONFIG_COMPAT_32BIT_TIME */
+
static void __init futex_detect_cmpxchg(void)
{
#ifndef CONFIG_HAVE_FUTEX_CMPXCHG