Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/mm/slub.c b/mm/slub.c
index ca7143f..1384dc9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -94,9 +94,7 @@
* minimal so we rely on the page allocators per cpu caches for
* fast frees and allocs.
*
- * Overloading of page flags that are otherwise used for LRU management.
- *
- * PageActive The slab is frozen and exempt from list processing.
+ * page->frozen The slab is frozen and exempt from list processing.
* This means that the slab is dedicated to a purpose
* such as satisfying allocations for a specific
* processor. Objects may be freed in the slab while
@@ -112,23 +110,27 @@
* free objects in addition to the regular freelist
* that requires the slab lock.
*
- * PageError Slab requires special handling due to debug
+ * SLAB_DEBUG_FLAGS Slab requires special handling due to debug
* options set. This moves slab handling out of
* the fast path and disables lockless freelists.
*/
-static inline int kmem_cache_debug(struct kmem_cache *s)
-{
#ifdef CONFIG_SLUB_DEBUG
- return unlikely(s->flags & SLAB_DEBUG_FLAGS);
+#ifdef CONFIG_SLUB_DEBUG_ON
+DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
#else
- return 0;
+DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
#endif
+#endif
+
+static inline bool kmem_cache_debug(struct kmem_cache *s)
+{
+ return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
}
void *fixup_red_left(struct kmem_cache *s, void *p)
{
- if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
+ if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
p += s->red_left_pad;
return p;
@@ -217,14 +219,10 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
#ifdef CONFIG_SYSFS
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
-static void memcg_propagate_slab_attrs(struct kmem_cache *s);
-static void sysfs_slab_remove(struct kmem_cache *s);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
-static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
#endif
static inline void stat(const struct kmem_cache *s, enum stat_item si)
@@ -295,7 +293,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
return get_freepointer(s, object);
freepointer_addr = (unsigned long)object + s->offset;
- probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
+ copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
return freelist_ptr(s, p, freepointer_addr);
}
@@ -316,12 +314,6 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
__p < (__addr) + (__objects) * (__s)->size; \
__p += (__s)->size)
-/* Determine object index from a given position */
-static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
-{
- return (kasan_reset_tag(p) - addr) / s->size;
-}
-
static inline unsigned int order_objects(unsigned int order, unsigned int size)
{
return ((unsigned int)PAGE_SIZE << order) / size;
@@ -442,19 +434,37 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
}
#ifdef CONFIG_SLUB_DEBUG
+static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
+static DEFINE_SPINLOCK(object_map_lock);
+
/*
* Determine a map of object in use on a page.
*
* Node listlock must be held to guarantee that the page does
* not vanish from under us.
*/
-static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
+static unsigned long *get_map(struct kmem_cache *s, struct page *page)
+ __acquires(&object_map_lock)
{
void *p;
void *addr = page_address(page);
+ VM_BUG_ON(!irqs_disabled());
+
+ spin_lock(&object_map_lock);
+
+ bitmap_zero(object_map, page->objects);
+
for (p = page->freelist; p; p = get_freepointer(s, p))
- set_bit(slab_index(p, s, addr), map);
+ set_bit(__obj_to_index(s, addr, p), object_map);
+
+ return object_map;
+}
+
+static void put_map(unsigned long *map) __releases(&object_map_lock)
+{
+ VM_BUG_ON(map != object_map);
+ spin_unlock(&object_map_lock);
}
static inline unsigned int size_from_object(struct kmem_cache *s)
@@ -482,7 +492,7 @@ static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
static slab_flags_t slub_debug;
#endif
-static char *slub_debug_slabs;
+static char *slub_debug_string;
static int disable_higher_order_debug;
/*
@@ -617,7 +627,7 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
#endif
}
-static void print_tracking(struct kmem_cache *s, void *object)
+void print_tracking(struct kmem_cache *s, void *object)
{
unsigned long pr_time = jiffies;
if (!(s->flags & SLAB_STORE_USER))
@@ -765,6 +775,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
{
u8 *fault;
u8 *end;
+ u8 *addr = page_address(page);
metadata_access_enable();
fault = memchr_inv(start, value, bytes);
@@ -777,8 +788,9 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
end--;
slab_bug(s, "%s overwritten", what);
- pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
- fault, end - 1, fault[0], value);
+ pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
+ fault, end - 1, fault - addr,
+ fault[0], value);
print_trailer(s, page, object);
restore_bytes(s, what, value, fault, end);
@@ -869,7 +881,8 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
while (end > fault && end[-1] == POISON_INUSE)
end--;
- slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
+ slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
+ fault, end - 1, fault - start);
print_section(KERN_ERR, "Padding ", pad, remainder);
restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
@@ -1092,7 +1105,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
static void setup_object_debug(struct kmem_cache *s, struct page *page,
void *object)
{
- if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
+ if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
return;
init_object(s, object, SLUB_RED_INACTIVE);
@@ -1102,7 +1115,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
static
void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
{
- if (!(s->flags & SLAB_POISON))
+ if (!kmem_cache_debug_flags(s, SLAB_POISON))
return;
metadata_access_enable();
@@ -1198,7 +1211,7 @@ static noinline int free_debug_processing(
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
void *object = head;
int cnt = 0;
- unsigned long uninitialized_var(flags);
+ unsigned long flags;
int ret = 0;
spin_lock_irqsave(&n->list_lock, flags);
@@ -1242,8 +1255,102 @@ static noinline int free_debug_processing(
return ret;
}
+/*
+ * Parse a block of slub_debug options. Blocks are delimited by ';'
+ *
+ * @str: start of block
+ * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
+ * @slabs: return start of list of slabs, or NULL when there's no list
+ * @init: assume this is initial parsing and not per-kmem-create parsing
+ *
+ * returns the start of next block if there's any, or NULL
+ */
+static char *
+parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
+{
+ bool higher_order_disable = false;
+
+ /* Skip any completely empty blocks */
+ while (*str && *str == ';')
+ str++;
+
+ if (*str == ',') {
+ /*
+ * No options but restriction on slabs. This means full
+ * debugging for slabs matching a pattern.
+ */
+ *flags = DEBUG_DEFAULT_FLAGS;
+ goto check_slabs;
+ }
+ *flags = 0;
+
+ /* Determine which debug features should be switched on */
+ for (; *str && *str != ',' && *str != ';'; str++) {
+ switch (tolower(*str)) {
+ case '-':
+ *flags = 0;
+ break;
+ case 'f':
+ *flags |= SLAB_CONSISTENCY_CHECKS;
+ break;
+ case 'z':
+ *flags |= SLAB_RED_ZONE;
+ break;
+ case 'p':
+ *flags |= SLAB_POISON;
+ break;
+ case 'u':
+ *flags |= SLAB_STORE_USER;
+ break;
+ case 't':
+ *flags |= SLAB_TRACE;
+ break;
+ case 'a':
+ *flags |= SLAB_FAILSLAB;
+ break;
+ case 'o':
+ /*
+ * Avoid enabling debugging on caches if its minimum
+ * order would increase as a result.
+ */
+ higher_order_disable = true;
+ break;
+ default:
+ if (init)
+ pr_err("slub_debug option '%c' unknown. skipped\n", *str);
+ }
+ }
+check_slabs:
+ if (*str == ',')
+ *slabs = ++str;
+ else
+ *slabs = NULL;
+
+ /* Skip over the slab list */
+ while (*str && *str != ';')
+ str++;
+
+ /* Skip any completely empty blocks */
+ while (*str && *str == ';')
+ str++;
+
+ if (init && higher_order_disable)
+ disable_higher_order_debug = 1;
+
+ if (*str)
+ return str;
+ else
+ return NULL;
+}
+
static int __init setup_slub_debug(char *str)
{
+ slab_flags_t flags;
+ char *saved_str;
+ char *slab_list;
+ bool global_slub_debug_changed = false;
+ bool slab_list_specified = false;
+
slub_debug = DEBUG_DEFAULT_FLAGS;
if (*str++ != '=' || !*str)
/*
@@ -1251,60 +1358,32 @@ static int __init setup_slub_debug(char *str)
*/
goto out;
- if (*str == ',')
- /*
- * No options but restriction on slabs. This means full
- * debugging for slabs matching a pattern.
- */
- goto check_slabs;
+ saved_str = str;
+ while (str) {
+ str = parse_slub_debug_flags(str, &flags, &slab_list, true);
- slub_debug = 0;
- if (*str == '-')
- /*
- * Switch off all debugging measures.
- */
- goto out;
-
- /*
- * Determine which debug features should be switched on
- */
- for (; *str && *str != ','; str++) {
- switch (tolower(*str)) {
- case 'f':
- slub_debug |= SLAB_CONSISTENCY_CHECKS;
- break;
- case 'z':
- slub_debug |= SLAB_RED_ZONE;
- break;
- case 'p':
- slub_debug |= SLAB_POISON;
- break;
- case 'u':
- slub_debug |= SLAB_STORE_USER;
- break;
- case 't':
- slub_debug |= SLAB_TRACE;
- break;
- case 'a':
- slub_debug |= SLAB_FAILSLAB;
- break;
- case 'o':
- /*
- * Avoid enabling debugging on caches if its minimum
- * order would increase as a result.
- */
- disable_higher_order_debug = 1;
- break;
- default:
- pr_err("slub_debug option '%c' unknown. skipped\n",
- *str);
+ if (!slab_list) {
+ slub_debug = flags;
+ global_slub_debug_changed = true;
+ } else {
+ slab_list_specified = true;
}
}
-check_slabs:
- if (*str == ',')
- slub_debug_slabs = str + 1;
+ /*
+ * For backwards compatibility, a single list of flags with list of
+ * slabs means debugging is only enabled for those slabs, so the global
+ * slub_debug should be 0. We can extended that to multiple lists as
+ * long as there is no option specifying flags without a slab list.
+ */
+ if (slab_list_specified) {
+ if (!global_slub_debug_changed)
+ slub_debug = 0;
+ slub_debug_string = saved_str;
+ }
out:
+ if (slub_debug != 0 || slub_debug_string)
+ static_branch_enable(&slub_debug_enabled);
if ((static_branch_unlikely(&init_on_alloc) ||
static_branch_unlikely(&init_on_free)) &&
(slub_debug & SLAB_POISON))
@@ -1319,7 +1398,6 @@ __setup("slub_debug", setup_slub_debug);
* @object_size: the size of an object without meta data
* @flags: flags to set
* @name: name of the cache
- * @ctor: constructor function
*
* Debug option(s) are applied to @flags. In addition to the debug
* option(s), if a slab name (or multiple) is specified i.e.
@@ -1327,41 +1405,47 @@ __setup("slub_debug", setup_slub_debug);
* then only the select slabs will receive the debug option(s).
*/
slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name,
- void (*ctor)(void *))
+ slab_flags_t flags, const char *name)
{
char *iter;
size_t len;
-
- /* If slub_debug = 0, it folds into the if conditional. */
- if (!slub_debug_slabs)
- return flags | slub_debug;
+ char *next_block;
+ slab_flags_t block_flags;
len = strlen(name);
- iter = slub_debug_slabs;
- while (*iter) {
- char *end, *glob;
- size_t cmplen;
+ next_block = slub_debug_string;
+ /* Go through all blocks of debug options, see if any matches our slab's name */
+ while (next_block) {
+ next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
+ if (!iter)
+ continue;
+ /* Found a block that has a slab list, search it */
+ while (*iter) {
+ char *end, *glob;
+ size_t cmplen;
- end = strchrnul(iter, ',');
+ end = strchrnul(iter, ',');
+ if (next_block && next_block < end)
+ end = next_block - 1;
- glob = strnchr(iter, end - iter, '*');
- if (glob)
- cmplen = glob - iter;
- else
- cmplen = max_t(size_t, len, (end - iter));
+ glob = strnchr(iter, end - iter, '*');
+ if (glob)
+ cmplen = glob - iter;
+ else
+ cmplen = max_t(size_t, len, (end - iter));
- if (!strncmp(name, iter, cmplen)) {
- flags |= slub_debug;
- break;
+ if (!strncmp(name, iter, cmplen)) {
+ flags |= block_flags;
+ return flags;
+ }
+
+ if (!*end || *end == ';')
+ break;
+ iter = end + 1;
}
-
- if (!*end)
- break;
- iter = end + 1;
}
- return flags;
+ return flags | slub_debug;
}
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
@@ -1386,8 +1470,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name,
- void (*ctor)(void *))
+ slab_flags_t flags, const char *name)
{
return flags;
}
@@ -1450,12 +1533,18 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(x, s->object_size);
+ /* Use KCSAN to help debug racy use-after-free. */
+ if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
+ __kcsan_check_access(x, s->object_size,
+ KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
+
/* KASAN might put x into memory quarantine, delaying its reuse */
return kasan_slab_free(s, x, _RET_IP_);
}
static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- void **head, void **tail)
+ void **head, void **tail,
+ int *cnt)
{
void *object;
@@ -1490,6 +1579,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
*head = object;
if (!*tail)
*tail = object;
+ } else {
+ /*
+ * Adjust the reconstructed freelist depth
+ * accordingly if object's reuse is delayed.
+ */
+ --(*cnt);
}
} while (object != old_tail);
@@ -1526,10 +1621,8 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
else
page = __alloc_pages_node(node, flags, order);
- if (page && charge_slab_page(page, flags, order, s)) {
- __free_pages(page, order);
- page = NULL;
- }
+ if (page)
+ account_slab_page(page, order, s);
return page;
}
@@ -1725,13 +1818,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
- if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
- gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
- flags &= ~GFP_SLAB_BUG_MASK;
- pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
- invalid_mask, &invalid_mask, flags, &flags);
- dump_stack();
- }
+ if (unlikely(flags & GFP_SLAB_BUG_MASK))
+ flags = kmalloc_fix_flags(flags);
return allocate_slab(s,
flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
@@ -1742,7 +1830,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
int order = compound_order(page);
int pages = 1 << order;
- if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+ if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;
slab_pad_check(s, page);
@@ -1757,7 +1845,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
page->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
- uncharge_slab_page(page, order, s);
+ unaccount_slab_page(page, order, s);
__free_pages(page, order);
}
@@ -1873,7 +1961,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
/*
* Racy check. If we mistakenly see no partial slabs then we
* just allocate an empty slab. If we mistakenly try to get a
- * partial slab and there is none available then get_partials()
+ * partial slab and there is none available then get_partial()
* will return NULL.
*/
if (!n || !n->nr_partial)
@@ -1918,7 +2006,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
struct zonelist *zonelist;
struct zoneref *z;
struct zone *zone;
- enum zone_type high_zoneidx = gfp_zone(flags);
+ enum zone_type highest_zoneidx = gfp_zone(flags);
void *object;
unsigned int cpuset_mems_cookie;
@@ -1947,7 +2035,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
do {
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), flags);
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
struct kmem_cache_node *n;
n = get_node(s, zone_to_nid(zone));
@@ -1991,9 +2079,9 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
return get_any_partial(s, flags, c);
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/*
- * Calculate the next globally unique transaction for disambiguiation
+ * Calculate the next globally unique transaction for disambiguation
* during cmpxchg. The transactions start with the cpu number and are then
* incremented by CONFIG_NR_CPUS.
*/
@@ -2036,7 +2124,7 @@ static inline void note_cmpxchg_failure(const char *n,
pr_info("%s %s: cmpxchg redo ", n, s->name);
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
pr_warn("due to cpu change %d -> %d\n",
tid_to_cpu(tid), tid_to_cpu(actual_tid));
@@ -2162,7 +2250,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
} else {
m = M_FULL;
- if (kmem_cache_debug(s) && !lock) {
+#ifdef CONFIG_SLUB_DEBUG
+ if ((s->flags & SLAB_STORE_USER) && !lock) {
lock = 1;
/*
* This also ensures that the scanning of full
@@ -2171,6 +2260,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
*/
spin_lock(&n->list_lock);
}
+#endif
}
if (l != m) {
@@ -2223,11 +2313,11 @@ static void unfreeze_partials(struct kmem_cache *s,
struct kmem_cache_node *n = NULL, *n2 = NULL;
struct page *page, *discard_page = NULL;
- while ((page = c->partial)) {
+ while ((page = slub_percpu_partial(c))) {
struct page new;
struct page old;
- c->partial = page->next;
+ slub_set_percpu_partial(c, page);
n2 = get_node(s, page_to_nid(page));
if (n != n2) {
@@ -2300,7 +2390,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
if (oldpage) {
pobjects = oldpage->pobjects;
pages = oldpage->pages;
- if (drain && pobjects > s->cpu_partial) {
+ if (drain && pobjects > slub_cpu_partial(s)) {
unsigned long flags;
/*
* partial array is full. Move the existing
@@ -2325,7 +2415,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
!= oldpage);
- if (unlikely(!s->cpu_partial)) {
+ if (unlikely(!slub_cpu_partial(s))) {
unsigned long flags;
local_irq_save(flags);
@@ -2376,7 +2466,7 @@ static bool has_cpu_slab(int cpu, void *info)
static void flush_all(struct kmem_cache *s)
{
- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
}
/*
@@ -2578,6 +2668,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
void *freelist;
struct page *page;
+ stat(s, ALLOC_SLOWPATH);
+
page = c->page;
if (!page) {
/*
@@ -2682,7 +2774,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long flags;
local_irq_save(flags);
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/*
* We may have been preempted and rescheduled on a different
* cpu before disabling interrupts. Need to reload cpu area
@@ -2724,8 +2816,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
struct kmem_cache_cpu *c;
struct page *page;
unsigned long tid;
+ struct obj_cgroup *objcg = NULL;
- s = slab_pre_alloc_hook(s, gfpflags);
+ s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
if (!s)
return NULL;
redo:
@@ -2736,13 +2829,13 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
* as we end up on the original cpu again when doing the cmpxchg.
*
* We should guarantee that tid and kmem_cache are retrieved on
- * the same cpu. It could be different if CONFIG_PREEMPT so we need
+ * the same cpu. It could be different if CONFIG_PREEMPTION so we need
* to check if it is matched or not.
*/
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
unlikely(tid != READ_ONCE(c->tid)));
/*
@@ -2766,7 +2859,6 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
page = c->page;
if (unlikely(!object || !page || !node_match(page, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
- stat(s, ALLOC_SLOWPATH);
} else {
void *next_object = get_freepointer_safe(s, object);
@@ -2801,7 +2893,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
- slab_post_alloc_hook(s, gfpflags, 1, &object);
+ slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
return object;
}
@@ -2881,7 +2973,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
struct page new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
- unsigned long uninitialized_var(flags);
+ unsigned long flags;
stat(s, FREE_SLOWPATH);
@@ -2935,20 +3027,21 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
if (likely(!n)) {
- /*
- * If we just froze the page then put it onto the
- * per cpu partial list.
- */
- if (new.frozen && !was_frozen) {
+ if (likely(was_frozen)) {
+ /*
+ * The list lock was not taken therefore no list
+ * activity can be necessary.
+ */
+ stat(s, FREE_FROZEN);
+ } else if (new.frozen) {
+ /*
+ * If we just froze the page then put it onto the
+ * per cpu partial list.
+ */
put_cpu_partial(s, page, 1);
stat(s, CPU_PARTIAL_FREE);
}
- /*
- * The list lock was not taken therefore no list
- * activity can be necessary.
- */
- if (was_frozen)
- stat(s, FREE_FROZEN);
+
return;
}
@@ -3006,6 +3099,10 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
+
+ /* memcg_slab_free_hook() is already called for bulk free. */
+ if (!tail)
+ memcg_slab_free_hook(s, &head, 1);
redo:
/*
* Determine the currently cpus per cpu slab.
@@ -3016,7 +3113,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
unlikely(tid != READ_ONCE(c->tid)));
/* Same with comment on barrier() in slab_alloc_node() */
@@ -3049,7 +3146,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
* With KASAN enabled slab_free_freelist_hook modifies the freelist
* to remove objects, whose reuse must be delayed.
*/
- if (slab_free_freelist_hook(s, &head, &tail))
+ if (slab_free_freelist_hook(s, &head, &tail, &cnt))
do_slab_free(s, page, head, tail, cnt, addr);
}
@@ -3167,6 +3264,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
if (WARN_ON(!size))
return;
+ memcg_slab_free_hook(s, p, size);
do {
struct detached_freelist df;
@@ -3185,9 +3283,10 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
{
struct kmem_cache_cpu *c;
int i;
+ struct obj_cgroup *objcg = NULL;
/* memcg and kmem_cache debug support */
- s = slab_pre_alloc_hook(s, flags);
+ s = slab_pre_alloc_hook(s, &objcg, size, flags);
if (unlikely(!s))
return false;
/*
@@ -3241,11 +3340,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
}
/* memcg and kmem_cache debug support */
- slab_post_alloc_hook(s, flags, size, p);
+ slab_post_alloc_hook(s, objcg, flags, size, p);
return i;
error:
local_irq_enable();
- slab_post_alloc_hook(s, flags, i, p);
+ slab_post_alloc_hook(s, objcg, flags, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
}
@@ -3530,15 +3629,15 @@ static void set_cpu_partial(struct kmem_cache *s)
* 50% to keep some capacity around for frees.
*/
if (!kmem_cache_has_cpu_partial(s))
- s->cpu_partial = 0;
+ slub_set_cpu_partial(s, 0);
else if (s->size >= PAGE_SIZE)
- s->cpu_partial = 2;
+ slub_set_cpu_partial(s, 2);
else if (s->size >= 1024)
- s->cpu_partial = 6;
+ slub_set_cpu_partial(s, 6);
else if (s->size >= 256)
- s->cpu_partial = 13;
+ slub_set_cpu_partial(s, 13);
else
- s->cpu_partial = 30;
+ slub_set_cpu_partial(s, 30);
#endif
}
@@ -3583,7 +3682,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
/*
* With that we have determined the number of bytes in actual use
- * by the object. This is the potential offset to the free pointer.
+ * by the object and redzoning.
*/
s->inuse = size;
@@ -3606,6 +3705,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*/
s->offset = size;
size += sizeof(void *);
+ } else {
+ /*
+ * Store freelist pointer near middle of object to keep
+ * it away from the edges of the object to avoid small
+ * sized over/underflows from neighboring allocations.
+ */
+ s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
}
#ifdef CONFIG_SLUB_DEBUG
@@ -3642,6 +3748,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*/
size = ALIGN(size, s->align);
s->size = size;
+ s->reciprocal_size = reciprocal_value(size);
if (forced_order >= 0)
order = forced_order;
else
@@ -3676,7 +3783,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
- s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
+ s->flags = kmem_cache_flags(s->size, flags, s->name);
#ifdef CONFIG_SLAB_FREELIST_HARDENED
s->random = get_random_long();
#endif
@@ -3727,33 +3834,32 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
if (alloc_kmem_cache_cpus(s))
return 0;
- free_kmem_cache_nodes(s);
error:
+ __kmem_cache_release(s);
return -EINVAL;
}
static void list_slab_objects(struct kmem_cache *s, struct page *page,
- const char *text)
+ const char *text)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
+ unsigned long *map;
void *p;
- unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC);
- if (!map)
- return;
+
slab_err(s, page, text, s->name);
slab_lock(page);
- get_map(s, page, map);
+ map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
- if (!test_bit(slab_index(p, s, addr), map)) {
+ if (!test_bit(__obj_to_index(s, addr, p), map)) {
pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
print_tracking(s, p);
}
}
+ put_map(map);
slab_unlock(page);
- bitmap_free(map);
#endif
}
@@ -3775,7 +3881,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
list_add(&page->slab_list, &discard);
} else {
list_slab_objects(s, page,
- "Objects remaining in %s on __kmem_cache_shutdown()");
+ "Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
@@ -3810,7 +3916,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
if (n->nr_partial || slabs_node(s, node))
return 1;
}
- sysfs_slab_remove(s);
return 0;
}
@@ -3880,8 +3985,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
- mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
- 1 << order);
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+ PAGE_SIZE << order);
}
return kmalloc_large_node_hook(ptr, size, flags);
@@ -3948,7 +4053,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
offset = (ptr - page_address(page)) % s->size;
/* Adjust for redzone and reject if within the redzone. */
- if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
+ if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
if (offset < s->red_left_pad)
usercopy_abort("SLUB object in left red zone",
s->name, to_user, offset, n);
@@ -4012,8 +4117,8 @@ void kfree(const void *x)
BUG_ON(!PageCompound(page));
kfree_hook(object);
- mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
- -(1 << order));
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+ -(PAGE_SIZE << order));
__free_pages(page, order);
return;
}
@@ -4094,36 +4199,6 @@ int __kmem_cache_shrink(struct kmem_cache *s)
return ret;
}
-#ifdef CONFIG_MEMCG
-void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
-{
- /*
- * Called with all the locks held after a sched RCU grace period.
- * Even if @s becomes empty after shrinking, we can't know that @s
- * doesn't have allocations already in-flight and thus can't
- * destroy @s until the associated memcg is released.
- *
- * However, let's remove the sysfs files for empty caches here.
- * Each cache has a lot of interface files which aren't
- * particularly useful for empty draining caches; otherwise, we can
- * easily end up with millions of unnecessary sysfs files on
- * systems which have a lot of memory and transient cgroups.
- */
- if (!__kmem_cache_shrink(s))
- sysfs_slab_remove(s);
-}
-
-void __kmemcg_cache_deactivate(struct kmem_cache *s)
-{
- /*
- * Disable empty slabs caching. Used to avoid pinning offline
- * memory cgroups by kmem pages that can be freed.
- */
- slub_set_cpu_partial(s, 0);
- s->min_partial = 0;
-}
-#endif /* CONFIG_MEMCG */
-
static int slab_mem_going_offline_callback(void *arg)
{
struct kmem_cache *s;
@@ -4278,9 +4353,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
p->slab_cache = s;
#endif
}
- slab_init_memcg_params(s);
list_add(&s->list, &slab_caches);
- memcg_link_cache(s, NULL);
return s;
}
@@ -4335,7 +4408,7 @@ struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
- struct kmem_cache *s, *c;
+ struct kmem_cache *s;
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
@@ -4348,11 +4421,6 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s->object_size = max(s->object_size, size);
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
- for_each_memcg_cache(c, s) {
- c->object_size = s->object_size;
- c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
- }
-
if (sysfs_slab_alias(s, name)) {
s->refcount--;
s = NULL;
@@ -4374,7 +4442,6 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
if (slab_state <= UP)
return 0;
- memcg_propagate_slab_attrs(s);
err = sysfs_slab_add(s);
if (err)
__kmem_cache_release(s);
@@ -4402,6 +4469,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
return ret;
}
+EXPORT_SYMBOL(__kmalloc_track_caller);
#ifdef CONFIG_NUMA
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
@@ -4432,6 +4500,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return ret;
}
+EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif
#ifdef CONFIG_SYSFS
@@ -4447,43 +4516,33 @@ static int count_total(struct page *page)
#endif
#ifdef CONFIG_SLUB_DEBUG
-static int validate_slab(struct kmem_cache *s, struct page *page,
- unsigned long *map)
+static void validate_slab(struct kmem_cache *s, struct page *page)
{
void *p;
void *addr = page_address(page);
+ unsigned long *map;
- if (!check_slab(s, page) ||
- !on_freelist(s, page, NULL))
- return 0;
+ slab_lock(page);
+
+ if (!check_slab(s, page) || !on_freelist(s, page, NULL))
+ goto unlock;
/* Now we know that a valid freelist exists */
- bitmap_zero(map, page->objects);
-
- get_map(s, page, map);
+ map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
- if (test_bit(slab_index(p, s, addr), map))
- if (!check_object(s, page, p, SLUB_RED_INACTIVE))
- return 0;
+ u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
+ SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
+
+ if (!check_object(s, page, p, val))
+ break;
}
-
- for_each_object(p, s, addr, page->objects)
- if (!test_bit(slab_index(p, s, addr), map))
- if (!check_object(s, page, p, SLUB_RED_ACTIVE))
- return 0;
- return 1;
-}
-
-static void validate_slab_slab(struct kmem_cache *s, struct page *page,
- unsigned long *map)
-{
- slab_lock(page);
- validate_slab(s, page, map);
+ put_map(map);
+unlock:
slab_unlock(page);
}
static int validate_slab_node(struct kmem_cache *s,
- struct kmem_cache_node *n, unsigned long *map)
+ struct kmem_cache_node *n)
{
unsigned long count = 0;
struct page *page;
@@ -4492,7 +4551,7 @@ static int validate_slab_node(struct kmem_cache *s,
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list) {
- validate_slab_slab(s, page, map);
+ validate_slab(s, page);
count++;
}
if (count != n->nr_partial)
@@ -4503,7 +4562,7 @@ static int validate_slab_node(struct kmem_cache *s,
goto out;
list_for_each_entry(page, &n->full, slab_list) {
- validate_slab_slab(s, page, map);
+ validate_slab(s, page);
count++;
}
if (count != atomic_long_read(&n->nr_slabs))
@@ -4520,15 +4579,11 @@ static long validate_slab_cache(struct kmem_cache *s)
int node;
unsigned long count = 0;
struct kmem_cache_node *n;
- unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
-
- if (!map)
- return -ENOMEM;
flush_all(s);
for_each_kmem_cache_node(s, node, n)
- count += validate_slab_node(s, n, map);
- bitmap_free(map);
+ count += validate_slab_node(s, n);
+
return count;
}
/*
@@ -4658,18 +4713,17 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
}
static void process_slab(struct loc_track *t, struct kmem_cache *s,
- struct page *page, enum track_item alloc,
- unsigned long *map)
+ struct page *page, enum track_item alloc)
{
void *addr = page_address(page);
void *p;
+ unsigned long *map;
- bitmap_zero(map, page->objects);
- get_map(s, page, map);
-
+ map = get_map(s, page);
for_each_object(p, s, addr, page->objects)
- if (!test_bit(slab_index(p, s, addr), map))
+ if (!test_bit(__obj_to_index(s, addr, p), map))
add_location(t, s, get_track(s, p, alloc));
+ put_map(map);
}
static int list_locations(struct kmem_cache *s, char *buf,
@@ -4680,11 +4734,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
struct loc_track t = { 0, 0, NULL };
int node;
struct kmem_cache_node *n;
- unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
- if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
- GFP_KERNEL)) {
- bitmap_free(map);
+ if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+ GFP_KERNEL)) {
return sprintf(buf, "Out of memory\n");
}
/* Push back cpu slabs */
@@ -4699,9 +4751,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list)
- process_slab(&t, s, page, alloc, map);
+ process_slab(&t, s, page, alloc);
list_for_each_entry(page, &n->full, slab_list)
- process_slab(&t, s, page, alloc, map);
+ process_slab(&t, s, page, alloc);
spin_unlock_irqrestore(&n->list_lock, flags);
}
@@ -4750,7 +4802,6 @@ static int list_locations(struct kmem_cache *s, char *buf,
}
free_loc_track(&t);
- bitmap_free(map);
if (!t.count)
len += sprintf(buf, "No data\n");
return len;
@@ -4954,20 +5005,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
return x + sprintf(buf + x, "\n");
}
-#ifdef CONFIG_SLUB_DEBUG
-static int any_slab_objects(struct kmem_cache *s)
-{
- int node;
- struct kmem_cache_node *n;
-
- for_each_kmem_cache_node(s, node, n)
- if (atomic_long_read(&n->total_objects))
- return 1;
-
- return 0;
-}
-#endif
-
#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
#define to_slab(n) container_of(n, struct kmem_cache, kobj)
@@ -5009,28 +5046,11 @@ static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(objs_per_slab);
-static ssize_t order_store(struct kmem_cache *s,
- const char *buf, size_t length)
-{
- unsigned int order;
- int err;
-
- err = kstrtouint(buf, 10, &order);
- if (err)
- return err;
-
- if (order > slub_max_order || order < slub_min_order)
- return -EINVAL;
-
- calculate_sizes(s, order);
- return length;
-}
-
static ssize_t order_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%u\n", oo_order(s->oo));
}
-SLAB_ATTR(order);
+SLAB_ATTR_RO(order);
static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
{
@@ -5152,16 +5172,7 @@ static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
}
-
-static ssize_t reclaim_account_store(struct kmem_cache *s,
- const char *buf, size_t length)
-{
- s->flags &= ~SLAB_RECLAIM_ACCOUNT;
- if (buf[0] == '1')
- s->flags |= SLAB_RECLAIM_ACCOUNT;
- return length;
-}
-SLAB_ATTR(reclaim_account);
+SLAB_ATTR_RO(reclaim_account);
static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
{
@@ -5206,104 +5217,34 @@ static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
}
-
-static ssize_t sanity_checks_store(struct kmem_cache *s,
- const char *buf, size_t length)
-{
- s->flags &= ~SLAB_CONSISTENCY_CHECKS;
- if (buf[0] == '1') {
- s->flags &= ~__CMPXCHG_DOUBLE;
- s->flags |= SLAB_CONSISTENCY_CHECKS;
- }
- return length;
-}
-SLAB_ATTR(sanity_checks);
+SLAB_ATTR_RO(sanity_checks);
static ssize_t trace_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
}
-
-static ssize_t trace_store(struct kmem_cache *s, const char *buf,
- size_t length)
-{
- /*
- * Tracing a merged cache is going to give confusing results
- * as well as cause other issues like converting a mergeable
- * cache into an umergeable one.
- */
- if (s->refcount > 1)
- return -EINVAL;
-
- s->flags &= ~SLAB_TRACE;
- if (buf[0] == '1') {
- s->flags &= ~__CMPXCHG_DOUBLE;
- s->flags |= SLAB_TRACE;
- }
- return length;
-}
-SLAB_ATTR(trace);
+SLAB_ATTR_RO(trace);
static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
}
-static ssize_t red_zone_store(struct kmem_cache *s,
- const char *buf, size_t length)
-{
- if (any_slab_objects(s))
- return -EBUSY;
-
- s->flags &= ~SLAB_RED_ZONE;
- if (buf[0] == '1') {
- s->flags |= SLAB_RED_ZONE;
- }
- calculate_sizes(s, -1);
- return length;
-}
-SLAB_ATTR(red_zone);
+SLAB_ATTR_RO(red_zone);
static ssize_t poison_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
}
-static ssize_t poison_store(struct kmem_cache *s,
- const char *buf, size_t length)
-{
- if (any_slab_objects(s))
- return -EBUSY;
-
- s->flags &= ~SLAB_POISON;
- if (buf[0] == '1') {
- s->flags |= SLAB_POISON;
- }
- calculate_sizes(s, -1);
- return length;
-}
-SLAB_ATTR(poison);
+SLAB_ATTR_RO(poison);
static ssize_t store_user_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
}
-static ssize_t store_user_store(struct kmem_cache *s,
- const char *buf, size_t length)
-{
- if (any_slab_objects(s))
- return -EBUSY;
-
- s->flags &= ~SLAB_STORE_USER;
- if (buf[0] == '1') {
- s->flags &= ~__CMPXCHG_DOUBLE;
- s->flags |= SLAB_STORE_USER;
- }
- calculate_sizes(s, -1);
- return length;
-}
-SLAB_ATTR(store_user);
+SLAB_ATTR_RO(store_user);
static ssize_t validate_show(struct kmem_cache *s, char *buf)
{
@@ -5346,19 +5287,7 @@ static ssize_t failslab_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
}
-
-static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
- size_t length)
-{
- if (s->refcount > 1)
- return -EINVAL;
-
- s->flags &= ~SLAB_FAILSLAB;
- if (buf[0] == '1')
- s->flags |= SLAB_FAILSLAB;
- return length;
-}
-SLAB_ATTR(failslab);
+SLAB_ATTR_RO(failslab);
#endif
static ssize_t shrink_show(struct kmem_cache *s, char *buf)
@@ -5370,7 +5299,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
const char *buf, size_t length)
{
if (buf[0] == '1')
- kmem_cache_shrink_all(s);
+ kmem_cache_shrink(s);
else
return -EINVAL;
return length;
@@ -5594,98 +5523,9 @@ static ssize_t slab_attr_store(struct kobject *kobj,
return -EIO;
err = attribute->store(s, buf, len);
-#ifdef CONFIG_MEMCG
- if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
- struct kmem_cache *c;
-
- mutex_lock(&slab_mutex);
- if (s->max_attr_size < len)
- s->max_attr_size = len;
-
- /*
- * This is a best effort propagation, so this function's return
- * value will be determined by the parent cache only. This is
- * basically because not all attributes will have a well
- * defined semantics for rollbacks - most of the actions will
- * have permanent effects.
- *
- * Returning the error value of any of the children that fail
- * is not 100 % defined, in the sense that users seeing the
- * error code won't be able to know anything about the state of
- * the cache.
- *
- * Only returning the error code for the parent cache at least
- * has well defined semantics. The cache being written to
- * directly either failed or succeeded, in which case we loop
- * through the descendants with best-effort propagation.
- */
- for_each_memcg_cache(c, s)
- attribute->store(c, buf, len);
- mutex_unlock(&slab_mutex);
- }
-#endif
return err;
}
-static void memcg_propagate_slab_attrs(struct kmem_cache *s)
-{
-#ifdef CONFIG_MEMCG
- int i;
- char *buffer = NULL;
- struct kmem_cache *root_cache;
-
- if (is_root_cache(s))
- return;
-
- root_cache = s->memcg_params.root_cache;
-
- /*
- * This mean this cache had no attribute written. Therefore, no point
- * in copying default values around
- */
- if (!root_cache->max_attr_size)
- return;
-
- for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
- char mbuf[64];
- char *buf;
- struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
- ssize_t len;
-
- if (!attr || !attr->store || !attr->show)
- continue;
-
- /*
- * It is really bad that we have to allocate here, so we will
- * do it only as a fallback. If we actually allocate, though,
- * we can just use the allocated buffer until the end.
- *
- * Most of the slub attributes will tend to be very small in
- * size, but sysfs allows buffers up to a page, so they can
- * theoretically happen.
- */
- if (buffer)
- buf = buffer;
- else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
- !IS_ENABLED(CONFIG_SLUB_STATS))
- buf = mbuf;
- else {
- buffer = (char *) get_zeroed_page(GFP_KERNEL);
- if (WARN_ON(!buffer))
- continue;
- buf = buffer;
- }
-
- len = attr->show(root_cache, buf);
- if (len > 0)
- attr->store(s, buf, len);
- }
-
- if (buffer)
- free_page((unsigned long)buffer);
-#endif /* CONFIG_MEMCG */
-}
-
static void kmem_cache_release(struct kobject *k)
{
slab_kmem_cache_release(to_slab(k));
@@ -5701,27 +5541,10 @@ static struct kobj_type slab_ktype = {
.release = kmem_cache_release,
};
-static int uevent_filter(struct kset *kset, struct kobject *kobj)
-{
- struct kobj_type *ktype = get_ktype(kobj);
-
- if (ktype == &slab_ktype)
- return 1;
- return 0;
-}
-
-static const struct kset_uevent_ops slab_uevent_ops = {
- .filter = uevent_filter,
-};
-
static struct kset *slab_kset;
static inline struct kset *cache_kset(struct kmem_cache *s)
{
-#ifdef CONFIG_MEMCG
- if (!is_root_cache(s))
- return s->memcg_params.root_cache->memcg_kset;
-#endif
return slab_kset;
}
@@ -5764,28 +5587,6 @@ static char *create_unique_id(struct kmem_cache *s)
return name;
}
-static void sysfs_slab_remove_workfn(struct work_struct *work)
-{
- struct kmem_cache *s =
- container_of(work, struct kmem_cache, kobj_remove_work);
-
- if (!s->kobj.state_in_sysfs)
- /*
- * For a memcg cache, this may be called during
- * deactivation and again on shutdown. Remove only once.
- * A cache is never shut down before deactivation is
- * complete, so no need to worry about synchronization.
- */
- goto out;
-
-#ifdef CONFIG_MEMCG
- kset_unregister(s->memcg_kset);
-#endif
- kobject_uevent(&s->kobj, KOBJ_REMOVE);
-out:
- kobject_put(&s->kobj);
-}
-
static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
@@ -5793,8 +5594,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
- INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
-
if (!kset) {
kobject_init(&s->kobj, &slab_ktype);
return 0;
@@ -5829,17 +5628,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
if (err)
goto out_del_kobj;
-#ifdef CONFIG_MEMCG
- if (is_root_cache(s) && memcg_sysfs_enabled) {
- s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
- if (!s->memcg_kset) {
- err = -ENOMEM;
- goto out_del_kobj;
- }
- }
-#endif
-
- kobject_uevent(&s->kobj, KOBJ_ADD);
if (!unmergeable) {
/* Setup first alias */
sysfs_slab_alias(s, s->name);
@@ -5853,19 +5641,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
goto out;
}
-static void sysfs_slab_remove(struct kmem_cache *s)
-{
- if (slab_state < FULL)
- /*
- * Sysfs has not been setup yet so no need to remove the
- * cache from sysfs.
- */
- return;
-
- kobject_get(&s->kobj);
- schedule_work(&s->kobj_remove_work);
-}
-
void sysfs_slab_unlink(struct kmem_cache *s)
{
if (slab_state >= FULL)
@@ -5920,7 +5695,7 @@ static int __init slab_sysfs_init(void)
mutex_lock(&slab_mutex);
- slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
+ slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
if (!slab_kset) {
mutex_unlock(&slab_mutex);
pr_err("Cannot register slab subsystem.\n");