Skip to content

Commit 148f95f

Browse files
committed
Merge tag 'slab-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab updates from Vlastimil Babka: - The percpu sheaves caching layer was introduced as opt-in in 6.18 and now we enable it for all caches and remove the previous cpu (partial) slab caching mechanism. Besides the lower locking overhead and much more likely fastpath when freeing, this removes the rather complicated code related to the cpu slab lockless fastpaths (using this_cpu_try_cmpxchg128/64) and all its complications for PREEMPT_RT or kmalloc_nolock(). The lockless slab freelist+counters update operation using try_cmpxchg128/64 remains and is crucial for freeing remote NUMA objects, and to allow flushing objects from sheaves to slabs mostly without the node list_lock (Vlastimil Babka) - Eliminate slabobj_ext metadata overhead when possible. Instead of using kmalloc() to allocate the array for memcg and/or allocation profiling tag pointers, use leftover space in a slab or per-object padding due to alignment (Harry Yoo) - Various followup improvements to the above (Hao Li) * tag 'slab-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (39 commits) slub: let need_slab_obj_exts() return false if SLAB_NO_OBJ_EXT is set mm/slab: only allow SLAB_OBJ_EXT_IN_OBJ for unmergeable caches mm/slab: place slabobj_ext metadata in unused space within s->size mm/slab: move [__]ksize and slab_ksize() to mm/slub.c mm/slab: save memory by allocating slabobj_ext array from leftover mm/memcontrol,alloc_tag: handle slabobj_ext access under KASAN poison mm/slab: use stride to access slabobj_ext mm/slab: abstract slabobj_ext access via new slab_obj_ext() helper ext4: specify the free pointer offset for ext4_inode_cache mm/slab: allow specifying free pointer offset when using constructor mm/slab: use unsigned long for orig_size to ensure proper metadata align slub: clarify object field layout comments mm/slab: avoid allocating slabobj_ext array from its own slab slub: avoid list_lock contention from __refill_objects_any() mm/slub: cleanup and repurpose some stat items mm/slub: remove DEACTIVATE_TO_* stat items slab: remove frozen slab checks from __slab_free() slab: update overview comments slab: refill sheaves from all nodes slab: remove unused PREEMPT_RT specific macros ...
2 parents 41f1a08 + 815c8e3 commit 148f95f

9 files changed

Lines changed: 1691 additions & 1997 deletions

File tree

fs/ext4/super.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,12 +1496,19 @@ static void init_once(void *foo)
14961496

14971497
static int __init init_inodecache(void)
14981498
{
1499-
ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1500-
sizeof(struct ext4_inode_info), 0,
1501-
SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
1502-
offsetof(struct ext4_inode_info, i_data),
1503-
sizeof_field(struct ext4_inode_info, i_data),
1504-
init_once);
1499+
struct kmem_cache_args args = {
1500+
.useroffset = offsetof(struct ext4_inode_info, i_data),
1501+
.usersize = sizeof_field(struct ext4_inode_info, i_data),
1502+
.use_freeptr_offset = true,
1503+
.freeptr_offset = offsetof(struct ext4_inode_info, i_flags),
1504+
.ctor = init_once,
1505+
};
1506+
1507+
ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
1508+
sizeof(struct ext4_inode_info),
1509+
&args,
1510+
SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT);
1511+
15051512
if (ext4_inode_cachep == NULL)
15061513
return -ENOMEM;
15071514
return 0;

include/linux/slab.h

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,9 @@ enum _slab_flag_bits {
5858
#endif
5959
_SLAB_OBJECT_POISON,
6060
_SLAB_CMPXCHG_DOUBLE,
61-
#ifdef CONFIG_SLAB_OBJ_EXT
6261
_SLAB_NO_OBJ_EXT,
62+
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
63+
_SLAB_OBJ_EXT_IN_OBJ,
6364
#endif
6465
_SLAB_FLAGS_LAST_BIT
6566
};
@@ -239,10 +240,12 @@ enum _slab_flag_bits {
239240
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
240241

241242
/* Slab created using create_boot_cache */
242-
#ifdef CONFIG_SLAB_OBJ_EXT
243243
#define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT)
244+
245+
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
246+
#define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ)
244247
#else
245-
#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED
248+
#define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_UNUSED
246249
#endif
247250

248251
/*
@@ -300,24 +303,26 @@ struct kmem_cache_args {
300303
unsigned int usersize;
301304
/**
302305
* @freeptr_offset: Custom offset for the free pointer
303-
* in &SLAB_TYPESAFE_BY_RCU caches
306+
* in caches with &SLAB_TYPESAFE_BY_RCU or @ctor
304307
*
305-
* By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
306-
* outside of the object. This might cause the object to grow in size.
307-
* Cache creators that have a reason to avoid this can specify a custom
308-
* free pointer offset in their struct where the free pointer will be
309-
* placed.
308+
* By default, &SLAB_TYPESAFE_BY_RCU and @ctor caches place the free
309+
* pointer outside of the object. This might cause the object to grow
310+
* in size. Cache creators that have a reason to avoid this can specify
311+
* a custom free pointer offset in their data structure where the free
312+
* pointer will be placed.
310313
*
311-
* Note that placing the free pointer inside the object requires the
312-
* caller to ensure that no fields are invalidated that are required to
313-
* guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
314-
* details).
314+
* For caches with &SLAB_TYPESAFE_BY_RCU, the caller must ensure that
315+
* the free pointer does not overlay fields required to guard against
316+
* object recycling (See &SLAB_TYPESAFE_BY_RCU for details).
315317
*
316-
* Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
317-
* is specified, %use_freeptr_offset must be set %true.
318+
* For caches with @ctor, the caller must ensure that the free pointer
319+
* does not overlay fields initialized by the constructor.
318320
*
319-
* Note that @ctor currently isn't supported with custom free pointers
320-
* as a @ctor requires an external free pointer.
321+
* Currently, only caches with &SLAB_TYPESAFE_BY_RCU or @ctor
322+
* may specify @freeptr_offset.
323+
*
324+
* Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
325+
* is specified, @use_freeptr_offset must be set %true.
321326
*/
322327
unsigned int freeptr_offset;
323328
/**
@@ -508,7 +513,6 @@ void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size
508513
void kfree(const void *objp);
509514
void kfree_nolock(const void *objp);
510515
void kfree_sensitive(const void *objp);
511-
size_t __ksize(const void *objp);
512516

513517
DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
514518
DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T))

mm/Kconfig

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -247,17 +247,6 @@ config SLUB_STATS
247247
out which slabs are relevant to a particular load.
248248
Try running: slabinfo -DA
249249

250-
config SLUB_CPU_PARTIAL
251-
default y
252-
depends on SMP && !SLUB_TINY
253-
bool "Enable per cpu partial caches"
254-
help
255-
Per cpu partial caches accelerate objects allocation and freeing
256-
that is local to a processor at the price of more indeterminism
257-
in the latency of the free. On overflow these caches will be cleared
258-
which requires the taking of locks that may cause latency spikes.
259-
Typically one would choose no for a realtime system.
260-
261250
config RANDOM_KMALLOC_CACHES
262251
default n
263252
depends on !SLUB_TINY

mm/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,7 @@ static inline struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned int ord
838838
struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order);
839839
#define alloc_frozen_pages_nolock(...) \
840840
alloc_hooks(alloc_frozen_pages_nolock_noprof(__VA_ARGS__))
841+
void free_frozen_pages_nolock(struct page *page, unsigned int order);
841842

842843
extern void zone_pcp_reset(struct zone *zone);
843844
extern void zone_pcp_disable(struct zone *zone);

mm/memcontrol.c

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2627,16 +2627,24 @@ struct mem_cgroup *mem_cgroup_from_obj_slab(struct slab *slab, void *p)
26272627
* Memcg membership data for each individual object is saved in
26282628
* slab->obj_exts.
26292629
*/
2630-
struct slabobj_ext *obj_exts;
2630+
unsigned long obj_exts;
2631+
struct slabobj_ext *obj_ext;
26312632
unsigned int off;
26322633

26332634
obj_exts = slab_obj_exts(slab);
26342635
if (!obj_exts)
26352636
return NULL;
26362637

2638+
get_slab_obj_exts(obj_exts);
26372639
off = obj_to_index(slab->slab_cache, slab, p);
2638-
if (obj_exts[off].objcg)
2639-
return obj_cgroup_memcg(obj_exts[off].objcg);
2640+
obj_ext = slab_obj_ext(slab, obj_exts, off);
2641+
if (obj_ext->objcg) {
2642+
struct obj_cgroup *objcg = obj_ext->objcg;
2643+
2644+
put_slab_obj_exts(obj_exts);
2645+
return obj_cgroup_memcg(objcg);
2646+
}
2647+
put_slab_obj_exts(obj_exts);
26402648

26412649
return NULL;
26422650
}
@@ -3222,6 +3230,9 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
32223230
}
32233231

32243232
for (i = 0; i < size; i++) {
3233+
unsigned long obj_exts;
3234+
struct slabobj_ext *obj_ext;
3235+
32253236
slab = virt_to_slab(p[i]);
32263237

32273238
if (!slab_obj_exts(slab) &&
@@ -3244,29 +3255,35 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
32443255
slab_pgdat(slab), cache_vmstat_idx(s)))
32453256
return false;
32463257

3258+
obj_exts = slab_obj_exts(slab);
3259+
get_slab_obj_exts(obj_exts);
32473260
off = obj_to_index(s, slab, p[i]);
3261+
obj_ext = slab_obj_ext(slab, obj_exts, off);
32483262
obj_cgroup_get(objcg);
3249-
slab_obj_exts(slab)[off].objcg = objcg;
3263+
obj_ext->objcg = objcg;
3264+
put_slab_obj_exts(obj_exts);
32503265
}
32513266

32523267
return true;
32533268
}
32543269

32553270
void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
3256-
void **p, int objects, struct slabobj_ext *obj_exts)
3271+
void **p, int objects, unsigned long obj_exts)
32573272
{
32583273
size_t obj_size = obj_full_size(s);
32593274

32603275
for (int i = 0; i < objects; i++) {
32613276
struct obj_cgroup *objcg;
3277+
struct slabobj_ext *obj_ext;
32623278
unsigned int off;
32633279

32643280
off = obj_to_index(s, slab, p[i]);
3265-
objcg = obj_exts[off].objcg;
3281+
obj_ext = slab_obj_ext(slab, obj_exts, off);
3282+
objcg = obj_ext->objcg;
32663283
if (!objcg)
32673284
continue;
32683285

3269-
obj_exts[off].objcg = NULL;
3286+
obj_ext->objcg = NULL;
32703287
refill_obj_stock(objcg, obj_size, true, -obj_size,
32713288
slab_pgdat(slab), cache_vmstat_idx(s));
32723289
obj_cgroup_put(objcg);

mm/page_alloc.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3011,6 +3011,11 @@ void free_frozen_pages(struct page *page, unsigned int order)
30113011
__free_frozen_pages(page, order, FPI_NONE);
30123012
}
30133013

3014+
void free_frozen_pages_nolock(struct page *page, unsigned int order)
3015+
{
3016+
__free_frozen_pages(page, order, FPI_TRYLOCK);
3017+
}
3018+
30143019
/*
30153020
* Free a batch of folios
30163021
*/

0 commit comments

Comments
 (0)