Skip to content

Commit 8186919

Browse files
Revert "Use a kmem_cache for homa_rpc allocation"
Performance measurements show that the kmem_cache actually hurts performance: homa_rpc allocation time increased from 378 cycles without the kmem_cache to 400 cycles with it.
1 parent 8cc45d9 commit 8186919

6 files changed

Lines changed: 23 additions & 76 deletions

File tree

homa_impl.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,6 @@ struct homa {
138138
*/
139139
struct homa_socktab *socktab;
140140

141-
/**
142-
* @rpc_kmem_cache: Used to allocate homa_rpc structs efficiently.
143-
*/
144-
struct kmem_cache *rpc_kmem_cache;
145-
146141
#ifndef __STRIP__ /* See strip.py */
147142
/**
148143
* @page_pool_mutex: Synchronizes access to any/all of the page_pools

homa_rpc.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ struct homa_rpc *homa_rpc_alloc_client(struct homa_sock *hsk,
3636
struct homa_rpc *crpc;
3737
int err;
3838

39-
crpc = kmem_cache_zalloc(hsk->homa->rpc_kmem_cache, GFP_KERNEL);
39+
crpc = kzalloc(sizeof(*crpc), GFP_KERNEL);
4040
if (unlikely(!crpc)) {
4141
hsk->error_msg = "couldn't allocate memory for client RPC";
4242
return ERR_PTR(-ENOMEM);
@@ -95,7 +95,7 @@ struct homa_rpc *homa_rpc_alloc_client(struct homa_sock *hsk,
9595
error:
9696
if (crpc->peer)
9797
homa_peer_release(crpc->peer);
98-
kmem_cache_free(hsk->homa->rpc_kmem_cache, crpc);
98+
kfree(crpc);
9999
return ERR_PTR(err);
100100
}
101101

@@ -146,7 +146,7 @@ struct homa_rpc *homa_rpc_alloc_server(struct homa_sock *hsk,
146146
}
147147

148148
/* Initialize fields that don't require the socket lock. */
149-
srpc = kmem_cache_zalloc(hsk->homa->rpc_kmem_cache, GFP_ATOMIC);
149+
srpc = kzalloc(sizeof(*srpc), GFP_ATOMIC);
150150
if (!srpc) {
151151
err = -ENOMEM;
152152
goto error;
@@ -209,11 +209,9 @@ struct homa_rpc *homa_rpc_alloc_server(struct homa_sock *hsk,
209209

210210
error:
211211
homa_bucket_unlock(bucket, id);
212-
if (srpc) {
213-
if (srpc->peer)
214-
homa_peer_release(srpc->peer);
215-
kmem_cache_free(hsk->homa->rpc_kmem_cache, srpc);
216-
}
212+
if (srpc && srpc->peer)
213+
homa_peer_release(srpc->peer);
214+
kfree(srpc);
217215
return ERR_PTR(err);
218216
}
219217

@@ -654,7 +652,7 @@ int homa_rpc_reap(struct homa_sock *hsk, bool reap_all)
654652
#endif /* See strip.py */
655653
rpc->state = 0;
656654
rpc->magic = 0;
657-
kmem_cache_free(hsk->homa->rpc_kmem_cache, rpc);
655+
kfree(rpc);
658656
}
659657
homa_sock_wakeup_wmem(hsk);
660658
tt_record4("reaped %d skbs, %d rpcs; %d skbs remain for port %d",

homa_utils.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,6 @@ int homa_init(struct homa *homa)
6565
if (!homa->socktab)
6666
return -ENOMEM;
6767
homa_socktab_init(homa->socktab);
68-
homa->rpc_kmem_cache = kmem_cache_create("homa_rpc",
69-
sizeof(struct homa_rpc),
70-
0, SLAB_HWCACHE_ALIGN, NULL);
71-
if (!homa->rpc_kmem_cache) {
72-
pr_err("Couldn't initialize rpc_kmem_cache\n");
73-
return -ENOMEM;
74-
}
7568
#ifndef __STRIP__ /* See strip.py */
7669
err = homa_skb_init(homa);
7770
if (err) {
@@ -164,10 +157,6 @@ void homa_destroy(struct homa *homa)
164157
homa->peertab = NULL;
165158
}
166159
#ifndef __STRIP__ /* See strip.py */
167-
if (homa->rpc_kmem_cache) {
168-
kmem_cache_destroy(homa->rpc_kmem_cache);
169-
homa->rpc_kmem_cache = NULL;
170-
}
171160

172161
homa_skb_cleanup(homa);
173162
#endif /* See strip.py */

perf.txt

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,21 @@ This file contains various notes and lessons learned concerning performance
22
of the Homa Linux kernel module. The notes are in reverse chronological
33
order.
44

5-
68. (January 2025) Performance snapshot with and without pacer, using
5+
69. (March 2026) Use a kmem_cache for homa_rpcs? Tried this, but took it out
6+
because it made performance slightly worse. Here are measurements of
7+
cycles/allocation for a 6-node cp_vs_tcp experiment on xl170s:
8+
9+
kmem_cache no kmem_cache
10+
416 395
11+
432 412
12+
401 381
13+
357 335
14+
405 376
15+
391 368
16+
-------------------
17+
400 378 (avg. of avgs)
18+
19+
68. (January 2026) Performance snapshot with and without pacer, using
620
c6620 CloudLab nodes, "-w w4 -b 80 -s 20 -n 6". cp_vs_tcp is used unless
721
cp_both is indicated.
822

test/mock.c

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ extern void *malloc(size_t size);
2828
#endif
2929
extern void *memcpy(void *dest, const void *src, size_t n);
3030

31-
struct kmem_cache {
32-
unsigned int size;
33-
int num;
34-
};
35-
3631
/* The variables below can be set to non-zero values by unit tests in order
3732
* to simulate error returns from various functions. If bit 0 is set to 1,
3833
* the next call to the function will fail; bit 1 corresponds to the next
@@ -985,41 +980,6 @@ void *__kmalloc_noprof(size_t size, gfp_t flags)
985980
return mock_kmalloc(size, flags);
986981
}
987982

988-
void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, gfp_t flags)
989-
{
990-
void *result = mock_kmalloc(cachep->size, flags);
991-
992-
if (result)
993-
cachep->num++;
994-
return result;
995-
}
996-
997-
struct kmem_cache *__kmem_cache_create_args(const char *name,
998-
unsigned int object_size,
999-
struct kmem_cache_args *args,
1000-
slab_flags_t flags)
1001-
{
1002-
struct kmem_cache *cache;
1003-
1004-
cache = mock_kmalloc(sizeof(*cache), GFP_KERNEL | __GFP_ZERO);
1005-
if (cache)
1006-
cache->size = object_size;
1007-
return cache;
1008-
}
1009-
1010-
void kmem_cache_destroy(struct kmem_cache *cache)
1011-
{
1012-
if (cache->num != 0)
1013-
FAIL(" kmem_cache destroyed with %d live objects", cache->num);
1014-
kfree(cache);
1015-
}
1016-
1017-
void kmem_cache_free(struct kmem_cache *cache, void *objp)
1018-
{
1019-
cache->num--;
1020-
kfree(objp);
1021-
}
1022-
1023983
void kvfree(const void *addr)
1024984
{
1025985
kfree(addr);

test/unit_homa_utils.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,21 +106,12 @@ TEST_F(homa_utils, homa_init__cant_allocate_port_map)
106106
EXPECT_EQ(NULL, homa2.socktab);
107107
homa_destroy(&homa2);
108108
}
109-
TEST_F(homa_utils, homa_init__cant_create_rpc_kmem_cache)
110-
{
111-
struct homa homa2;
112-
113-
mock_kmalloc_errors = 0x40;
114-
EXPECT_EQ(ENOMEM, -homa_init(&homa2));
115-
EXPECT_SUBSTR("Couldn't initialize rpc_kmem_cache", mock_printk_output);
116-
homa_destroy(&homa2);
117-
}
118109
#ifndef __STRIP__ /* See strip.py */
119110
TEST_F(homa_utils, homa_init__homa_skb_init_failure)
120111
{
121112
struct homa homa2;
122113

123-
mock_kmalloc_errors = 0x80;
114+
mock_kmalloc_errors = 0x40;
124115
EXPECT_EQ(ENOMEM, -homa_init(&homa2));
125116
EXPECT_SUBSTR("Couldn't initialize skb management (errno 12)",
126117
mock_printk_output);

0 commit comments

Comments
 (0)