Skip to content

Commit 8776858

Browse files
committed
Merge tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux
Pull dma-mapping updates from Marek Szyprowski: - added support for batched cache sync, what improves performance of dma_map/unmap_sg() operations on ARM64 architecture (Barry Song) - introduced DMA_ATTR_CC_SHARED attribute for explicitly shared memory used in confidential computing (Jiri Pirko) - refactored spaghetti-like code in drivers/of/of_reserved_mem.c and its clients (Marek Szyprowski, shared branch with device-tree updates to avoid merge conflicts) - prepared Contiguous Memory Allocator related code for making dma-buf drivers modularized (Maxime Ripard) - added support for benchmarking dma_map_sg() calls to tools/dma utility (Qinxin Xia) * tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux: (24 commits) dma-buf: heaps: system: document system_cc_shared heap dma-buf: heaps: system: add system_cc_shared heap for explicitly shared memory dma-mapping: introduce DMA_ATTR_CC_SHARED for shared memory mm: cma: Export cma_alloc(), cma_release() and cma_get_name() dma: contiguous: Export dev_get_cma_area() dma: contiguous: Make dma_contiguous_default_area static dma: contiguous: Make dev_get_cma_area() a proper function dma: contiguous: Turn heap registration logic around of: reserved_mem: rework fdt_init_reserved_mem_node() of: reserved_mem: clarify fdt_scan_reserved_mem*() functions of: reserved_mem: rearrange code a bit of: reserved_mem: replace CMA quirks by generic methods of: reserved_mem: switch to ops based OF_DECLARE() of: reserved_mem: use -ENODEV instead of -ENOENT of: reserved_mem: remove fdt node from the structure dma-mapping: fix false kernel-doc comment marker dma-mapping: Support batch mode for dma_direct_{map,unmap}_sg dma-mapping: Separate DMA sync issuing and completion waiting arm64: Provide dcache_inval_poc_nosync helper arm64: Provide dcache_clean_poc_nosync helper ...
2 parents d662a71 + 15818b2 commit 8776858

33 files changed

Lines changed: 917 additions & 345 deletions

File tree

Documentation/userspace-api/dma-buf-heaps.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ following heaps:
1616

1717
- The ``system`` heap allocates virtually contiguous, cacheable, buffers.
1818

19+
- The ``system_cc_shared`` heap allocates virtually contiguous, cacheable,
20+
buffers using shared (decrypted) memory. It is only present on
21+
confidential computing (CoCo) VMs where memory encryption is active
22+
(e.g., AMD SEV, Intel TDX). The allocated pages have the encryption
23+
bit cleared, making them accessible for device DMA without TDISP
24+
support. On non-CoCo VM configurations, this heap is not registered.
25+
1926
- The ``default_cma_region`` heap allocates physically contiguous,
2027
cacheable, buffers. Only present if a CMA region is present. Such a
2128
region is usually created either through the kernel commandline

arch/arm64/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ config ARM64
5454
select ARCH_HAS_STRICT_MODULE_RWX
5555
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
5656
select ARCH_HAS_SYNC_DMA_FOR_CPU
57+
select ARCH_HAS_BATCHED_DMA_SYNC
5758
select ARCH_HAS_SYSCALL_WRAPPER
5859
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
5960
select ARCH_HAS_ZONE_DMA_SET if EXPERT

arch/arm64/include/asm/assembler.h

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -371,14 +371,13 @@ alternative_endif
371371
* [start, end) with dcache line size explicitly provided.
372372
*
373373
* op: operation passed to dc instruction
374-
* domain: domain used in dsb instruction
375374
* start: starting virtual address of the region
376375
* end: end virtual address of the region
377376
* linesz: dcache line size
378377
* fixup: optional label to branch to on user fault
379378
* Corrupts: start, end, tmp
380379
*/
381-
.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
380+
.macro dcache_by_myline_op_nosync op, start, end, linesz, tmp, fixup
382381
sub \tmp, \linesz, #1
383382
bic \start, \start, \tmp
384383
alternative_if ARM64_WORKAROUND_4311569
@@ -412,14 +411,28 @@ alternative_if ARM64_WORKAROUND_4311569
412411
cbnz \start, .Ldcache_op\@
413412
.endif
414413
alternative_else_nop_endif
415-
dsb \domain
416414

417415
_cond_uaccess_extable .Ldcache_op\@, \fixup
418416
.endm
419417

420418
/*
421419
* Macro to perform a data cache maintenance for the interval
422-
* [start, end)
420+
* [start, end) without waiting for completion
421+
*
422+
* op: operation passed to dc instruction
423+
* start: starting virtual address of the region
424+
* end: end virtual address of the region
425+
* fixup: optional label to branch to on user fault
426+
* Corrupts: start, end, tmp1, tmp2
427+
*/
428+
.macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2, fixup
429+
dcache_line_size \tmp1, \tmp2
430+
dcache_by_myline_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
431+
.endm
432+
433+
/*
434+
* Macro to perform a data cache maintenance for the interval
435+
* [start, end) and wait for completion
423436
*
424437
* op: operation passed to dc instruction
425438
* domain: domain used in dsb instruction
@@ -429,8 +442,8 @@ alternative_else_nop_endif
429442
* Corrupts: start, end, tmp1, tmp2
430443
*/
431444
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
432-
dcache_line_size \tmp1, \tmp2
433-
dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
445+
dcache_by_line_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
446+
dsb \domain
434447
.endm
435448

436449
/*

arch/arm64/include/asm/cache.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ int cache_line_size(void);
8787

8888
#define dma_get_cache_alignment cache_line_size
8989

90+
static inline void arch_sync_dma_flush(void)
91+
{
92+
dsb(sy);
93+
}
94+
9095
/* Compress a u64 MPIDR value into 32 bits. */
9196
static inline u64 arch_compact_of_hwid(u64 id)
9297
{

arch/arm64/include/asm/cacheflush.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ extern void icache_inval_pou(unsigned long start, unsigned long end);
7474
extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
7575
extern void dcache_inval_poc(unsigned long start, unsigned long end);
7676
extern void dcache_clean_poc(unsigned long start, unsigned long end);
77+
extern void dcache_inval_poc_nosync(unsigned long start, unsigned long end);
78+
extern void dcache_clean_poc_nosync(unsigned long start, unsigned long end);
7779
extern void dcache_clean_pop(unsigned long start, unsigned long end);
7880
extern void dcache_clean_pou(unsigned long start, unsigned long end);
7981
extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);

arch/arm64/kernel/relocate_kernel.S

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel)
6464
mov x19, x13
6565
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
6666
add x1, x19, #PAGE_SIZE
67-
dcache_by_myline_op civac, sy, x19, x1, x15, x20
67+
dcache_by_myline_op_nosync civac, x19, x1, x15, x20
68+
dsb sy
6869
b .Lnext
6970
.Ltest_indirection:
7071
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination

arch/arm64/mm/cache.S

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,17 +132,7 @@ alternative_else_nop_endif
132132
ret
133133
SYM_FUNC_END(dcache_clean_pou)
134134

135-
/*
136-
* dcache_inval_poc(start, end)
137-
*
138-
* Ensure that any D-cache lines for the interval [start, end)
139-
* are invalidated. Any partial lines at the ends of the interval are
140-
* also cleaned to PoC to prevent data loss.
141-
*
142-
* - start - kernel start address of region
143-
* - end - kernel end address of region
144-
*/
145-
SYM_FUNC_START(__pi_dcache_inval_poc)
135+
.macro __dcache_inval_poc_nosync
146136
dcache_line_size x2, x3
147137
sub x3, x2, #1
148138
tst x1, x3 // end cache line aligned?
@@ -158,11 +148,41 @@ SYM_FUNC_START(__pi_dcache_inval_poc)
158148
3: add x0, x0, x2
159149
cmp x0, x1
160150
b.lo 2b
151+
.endm
152+
153+
/*
154+
* dcache_inval_poc(start, end)
155+
*
156+
* Ensure that any D-cache lines for the interval [start, end)
157+
* are invalidated. Any partial lines at the ends of the interval are
158+
* also cleaned to PoC to prevent data loss.
159+
*
160+
* - start - kernel start address of region
161+
* - end - kernel end address of region
162+
*/
163+
SYM_FUNC_START(__pi_dcache_inval_poc)
164+
__dcache_inval_poc_nosync
161165
dsb sy
162166
ret
163167
SYM_FUNC_END(__pi_dcache_inval_poc)
164168
SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
165169

170+
/*
171+
* dcache_inval_poc_nosync(start, end)
172+
*
173+
* Issue the instructions of D-cache lines for the interval [start, end)
174+
* for invalidation. Not necessarily cleaned to PoC till an explicit dsb
175+
* sy is issued later
176+
*
177+
* - start - kernel start address of region
178+
* - end - kernel end address of region
179+
*/
180+
SYM_FUNC_START(__pi_dcache_inval_poc_nosync)
181+
__dcache_inval_poc_nosync
182+
ret
183+
SYM_FUNC_END(__pi_dcache_inval_poc_nosync)
184+
SYM_FUNC_ALIAS(dcache_inval_poc_nosync, __pi_dcache_inval_poc_nosync)
185+
166186
/*
167187
* dcache_clean_poc(start, end)
168188
*
@@ -178,6 +198,21 @@ SYM_FUNC_START(__pi_dcache_clean_poc)
178198
SYM_FUNC_END(__pi_dcache_clean_poc)
179199
SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
180200

201+
/*
202+
* dcache_clean_poc_nosync(start, end)
203+
*
204+
* Issue the instructions of D-cache lines for the interval [start, end).
205+
* not necessarily cleaned to the PoC till an explicit dsb sy afterward.
206+
*
207+
* - start - virtual start address of region
208+
* - end - virtual end address of region
209+
*/
210+
SYM_FUNC_START(__pi_dcache_clean_poc_nosync)
211+
dcache_by_line_op_nosync cvac, x0, x1, x2, x3
212+
ret
213+
SYM_FUNC_END(__pi_dcache_clean_poc_nosync)
214+
SYM_FUNC_ALIAS(dcache_clean_poc_nosync, __pi_dcache_clean_poc_nosync)
215+
181216
/*
182217
* dcache_clean_pop(start, end)
183218
*

arch/arm64/mm/dma-mapping.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
1717
{
1818
unsigned long start = (unsigned long)phys_to_virt(paddr);
1919

20-
dcache_clean_poc(start, start + size);
20+
dcache_clean_poc_nosync(start, start + size);
2121
}
2222

2323
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
@@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
2828
if (dir == DMA_TO_DEVICE)
2929
return;
3030

31-
dcache_inval_poc(start, start + size);
31+
dcache_inval_poc_nosync(start, start + size);
3232
}
3333

3434
void arch_dma_prep_coherent(struct page *page, size_t size)

drivers/dma-buf/heaps/cma_heap.c

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
#include <linux/cma.h>
1616
#include <linux/dma-buf.h>
17-
#include <linux/dma-buf/heaps/cma.h>
1817
#include <linux/dma-heap.h>
1918
#include <linux/dma-map-ops.h>
2019
#include <linux/err.h>
@@ -30,19 +29,6 @@
3029

3130
#define DEFAULT_CMA_NAME "default_cma_region"
3231

33-
static struct cma *dma_areas[MAX_CMA_AREAS] __initdata;
34-
static unsigned int dma_areas_num __initdata;
35-
36-
int __init dma_heap_cma_register_heap(struct cma *cma)
37-
{
38-
if (dma_areas_num >= ARRAY_SIZE(dma_areas))
39-
return -EINVAL;
40-
41-
dma_areas[dma_areas_num++] = cma;
42-
43-
return 0;
44-
}
45-
4632
struct cma_heap {
4733
struct dma_heap *heap;
4834
struct cma *cma;
@@ -411,6 +397,7 @@ static int __init __add_cma_heap(struct cma *cma, const char *name)
411397
static int __init add_cma_heaps(void)
412398
{
413399
struct cma *default_cma = dev_get_cma_area(NULL);
400+
struct cma *cma;
414401
unsigned int i;
415402
int ret;
416403

@@ -420,9 +407,7 @@ static int __init add_cma_heaps(void)
420407
return ret;
421408
}
422409

423-
for (i = 0; i < dma_areas_num; i++) {
424-
struct cma *cma = dma_areas[i];
425-
410+
for (i = 0; (cma = dma_contiguous_get_area_by_idx(i)) != NULL; i++) {
426411
ret = __add_cma_heap(cma, cma_get_name(cma));
427412
if (ret) {
428413
pr_warn("Failed to add CMA heap %s", cma_get_name(cma));

0 commit comments

Comments
 (0)