Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ee4eecc..de94881 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -89,9 +89,12 @@
unsigned int may_swap:1;
/*
- * Cgroups are not reclaimed below their configured memory.low,
- * unless we threaten to OOM. If any cgroups are skipped due to
- * memory.low and nothing was reclaimed, go back for memory.low.
+ * Cgroup memory below memory.low is protected as long as we
+ * don't threaten to OOM. If any cgroup is reclaimed at
+ * reduced force or passed over entirely due to its memory.low
+ * setting (memcg_low_skipped), and nothing is reclaimed as a
+ * result, then go back for one more cycle that reclaims the protected
+ * memory (memcg_low_reclaim) to avert OOM.
*/
unsigned int memcg_low_reclaim:1;
unsigned int memcg_low_skipped:1;
@@ -422,7 +425,7 @@
{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
@@ -2458,14 +2461,14 @@
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
unsigned long lruvec_size;
+ unsigned long low, min;
unsigned long scan;
- unsigned long protection;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
- protection = mem_cgroup_protection(memcg,
- sc->memcg_low_reclaim);
+ mem_cgroup_protection(sc->target_mem_cgroup, memcg,
+ &min, &low);
- if (protection) {
+ if (min || low) {
/*
* Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min
@@ -2496,12 +2499,21 @@
* hard protection.
*/
unsigned long cgroup_size = mem_cgroup_size(memcg);
+ unsigned long protection;
+
+ /* memory.low scaling, make sure we retry before OOM */
+ if (!sc->memcg_low_reclaim && low > min) {
+ protection = low;
+ sc->memcg_low_skipped = 1;
+ } else {
+ protection = min;
+ }
/* Avoid TOCTOU with earlier protection check */
cgroup_size = max(cgroup_size, protection);
scan = lruvec_size - lruvec_size * protection /
- cgroup_size;
+ (cgroup_size + 1);
/*
* Minimally target SWAP_CLUSTER_MAX pages to keep
@@ -2530,10 +2542,13 @@
/*
* Scan types proportional to swappiness and
* their relative recent reclaim efficiency.
- * Make sure we don't miss the last page
- * because of a round-off error.
+ * Make sure we don't miss the last page on
+ * the offlined memory cgroups because of a
+ * round-off error.
*/
- scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+ scan = mem_cgroup_online(memcg) ?
+ div64_u64(scan * fraction[file], denominator) :
+ DIV64_U64_ROUND_UP(scan * fraction[file],
denominator);
break;
case SCAN_FILE:
@@ -2772,6 +2787,14 @@
unsigned long reclaimed;
unsigned long scanned;
+ /*
+ * This loop can become CPU-bound when target memcgs
+ * aren't eligible for reclaim - either because they
+ * don't have any reclaimable pages, or because their
+ * memory is explicitly protected. Avoid soft lockups.
+ */
+ cond_resched();
+
switch (mem_cgroup_protected(root, memcg)) {
case MEMCG_PROT_MIN:
/*
@@ -3157,8 +3180,9 @@
/* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
- pgdat->kswapd_classzone_idx = min(pgdat->kswapd_classzone_idx,
- (enum zone_type)ZONE_NORMAL);
+ if (READ_ONCE(pgdat->kswapd_classzone_idx) > ZONE_NORMAL)
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, ZONE_NORMAL);
+
wake_up_interruptible(&pgdat->kswapd_wait);
}
@@ -3790,9 +3814,9 @@
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
enum zone_type prev_classzone_idx)
{
- if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
- return prev_classzone_idx;
- return pgdat->kswapd_classzone_idx;
+ enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
+
+ return curr_idx == MAX_NR_ZONES ? prev_classzone_idx : curr_idx;
}
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
@@ -3836,8 +3860,11 @@
* the previous request that slept prematurely.
*/
if (remaining) {
- pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
- pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order);
+ WRITE_ONCE(pgdat->kswapd_classzone_idx,
+ kswapd_classzone_idx(pgdat, classzone_idx));
+
+ if (READ_ONCE(pgdat->kswapd_order) < reclaim_order)
+ WRITE_ONCE(pgdat->kswapd_order, reclaim_order);
}
finish_wait(&pgdat->kswapd_wait, &wait);
@@ -3914,12 +3941,12 @@
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
set_freezable();
- pgdat->kswapd_order = 0;
- pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
+ WRITE_ONCE(pgdat->kswapd_order, 0);
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
for ( ; ; ) {
bool ret;
- alloc_order = reclaim_order = pgdat->kswapd_order;
+ alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
kswapd_try_sleep:
@@ -3927,10 +3954,10 @@
classzone_idx);
/* Read the new order and classzone_idx */
- alloc_order = reclaim_order = pgdat->kswapd_order;
+ alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
- pgdat->kswapd_order = 0;
- pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
+ WRITE_ONCE(pgdat->kswapd_order, 0);
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
ret = try_to_freeze();
if (kthread_should_stop())
@@ -3974,20 +4001,23 @@
enum zone_type classzone_idx)
{
pg_data_t *pgdat;
+ enum zone_type curr_idx;
if (!managed_zone(zone))
return;
if (!cpuset_zone_allowed(zone, gfp_flags))
return;
- pgdat = zone->zone_pgdat;
- if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
- pgdat->kswapd_classzone_idx = classzone_idx;
- else
- pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx,
- classzone_idx);
- pgdat->kswapd_order = max(pgdat->kswapd_order, order);
+ pgdat = zone->zone_pgdat;
+ curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
+
+ if (curr_idx == MAX_NR_ZONES || curr_idx < classzone_idx)
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, classzone_idx);
+
+ if (READ_ONCE(pgdat->kswapd_order) < order)
+ WRITE_ONCE(pgdat->kswapd_order, order);
+
if (!waitqueue_active(&pgdat->kswapd_wait))
return;