Skip to content

Commit 15d6dd1

Browse files
committed
Merge tag 'dma-mapping-7.0-2026-03-25' into dma-mapping-for-next
dma-mapping fixes for Linux 7.0 A set of fixes for DMA-mapping subsystem, which resolve false-positive warnings from KMSAN and DMA-API debug (Shigeru Yoshida and Leon Romanovsky) as well as a simple build fix (Miguel Ojeda). Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
2 parents d9794c0 + 2cdaff2 commit 15d6dd1

13 files changed

Lines changed: 112 additions & 38 deletions

File tree

Documentation/core-api/dma-attributes.rst

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,33 @@ For architectures that require cache flushing for DMA coherence
149149
DMA_ATTR_MMIO will not perform any cache flushing. The address
150150
provided must never be mapped cacheable into the CPU.
151151

152-
DMA_ATTR_CPU_CACHE_CLEAN
153-
------------------------
154-
155-
This attribute indicates the CPU will not dirty any cacheline overlapping this
156-
DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows
157-
multiple small buffers to safely share a cacheline without risk of data
158-
corruption, suppressing DMA debug warnings about overlapping mappings.
159-
All mappings sharing a cacheline should have this attribute.
152+
DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
153+
------------------------------------
154+
155+
This attribute indicates that CPU cache lines may overlap for buffers mapped
156+
with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
157+
158+
Such overlap may occur when callers map multiple small buffers that reside
159+
within the same cache line. In this case, callers must guarantee that the CPU
160+
will not dirty these cache lines after the mappings are established. When this
161+
condition is met, multiple buffers can safely share a cache line without risking
162+
data corruption.
163+
164+
All mappings that share a cache line must set this attribute to suppress DMA
165+
debug warnings about overlapping mappings.
166+
167+
DMA_ATTR_REQUIRE_COHERENT
168+
-------------------------
169+
170+
DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
171+
system where SWIOTLB or cache management is required. This should only
172+
be used to support uAPI designs that require continuous HW DMA
173+
coherence with userspace processes, for example RDMA and DRM. At a
174+
minimum the memory being mapped must be userspace memory from
175+
pin_user_pages() or similar.
176+
177+
Drivers should consider using dma_mmap_pages() instead of this
178+
interface when building their uAPIs, when possible.
179+
180+
It must never be used in an in-kernel driver that only works with
181+
kernel memory.

arch/sparc/kernel/iommu.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ static dma_addr_t dma_4u_map_phys(struct device *dev, phys_addr_t phys,
312312
if (direction != DMA_TO_DEVICE)
313313
iopte_protection |= IOPTE_WRITE;
314314

315+
phys &= IO_PAGE_MASK;
316+
315317
for (i = 0; i < npages; i++, base++, phys += IO_PAGE_SIZE)
316318
iopte_val(*base) = iopte_protection | phys;
317319

arch/sparc/kernel/pci_sun4v.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ static dma_addr_t dma_4v_map_phys(struct device *dev, phys_addr_t phys,
410410

411411
iommu_batch_start(dev, prot, entry);
412412

413+
phys &= IO_PAGE_MASK;
414+
413415
for (i = 0; i < npages; i++, phys += IO_PAGE_SIZE) {
414416
long err = iommu_batch_add(phys, mask);
415417
if (unlikely(err < 0L))

drivers/infiniband/core/umem.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
5555

5656
if (dirty)
5757
ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
58-
DMA_BIDIRECTIONAL, 0);
58+
DMA_BIDIRECTIONAL,
59+
DMA_ATTR_REQUIRE_COHERENT);
5960

6061
for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
6162
unpin_user_page_range_dirty_lock(sg_page(sg),
@@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
169170
unsigned long lock_limit;
170171
unsigned long new_pinned;
171172
unsigned long cur_base;
172-
unsigned long dma_attr = 0;
173+
unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT;
173174
struct mm_struct *mm;
174175
unsigned long npages;
175176
int pinned, ret;

drivers/iommu/dma-iommu.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,7 +1219,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
12191219
*/
12201220
if (dev_use_swiotlb(dev, size, dir) &&
12211221
iova_unaligned(iovad, phys, size)) {
1222-
if (attrs & DMA_ATTR_MMIO)
1222+
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
12231223
return DMA_MAPPING_ERROR;
12241224

12251225
phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
@@ -1233,7 +1233,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
12331233
}
12341234

12351235
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
1236-
if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
1236+
if (iova == DMA_MAPPING_ERROR &&
1237+
!(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)))
12371238
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
12381239
return iova;
12391240
}
@@ -1243,7 +1244,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
12431244
{
12441245
phys_addr_t phys;
12451246

1246-
if (attrs & DMA_ATTR_MMIO) {
1247+
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) {
12471248
__iommu_dma_unmap(dev, dma_handle, size);
12481249
return;
12491250
}
@@ -1957,9 +1958,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
19571958
if (WARN_ON_ONCE(iova_start_pad && offset > 0))
19581959
return -EIO;
19591960

1961+
/*
1962+
* DMA_IOVA_USE_SWIOTLB is set on state after some entry
1963+
* took SWIOTLB path, which we were supposed to prevent
1964+
* for DMA_ATTR_REQUIRE_COHERENT attribute.
1965+
*/
1966+
if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) &&
1967+
(attrs & DMA_ATTR_REQUIRE_COHERENT)))
1968+
return -EOPNOTSUPP;
1969+
1970+
if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
1971+
return -EOPNOTSUPP;
1972+
19601973
if (dev_use_swiotlb(dev, size, dir) &&
19611974
iova_unaligned(iovad, phys, size)) {
1962-
if (attrs & DMA_ATTR_MMIO)
1975+
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
19631976
return -EPERM;
19641977

19651978
return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,

drivers/virtio/virtio_ring.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
29122912
* @data: the token identifying the buffer.
29132913
* @gfp: how to do memory allocations (if necessary).
29142914
*
2915-
* Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate
2916-
* that the CPU will not dirty any cacheline overlapping this buffer while it
2917-
* is available, and to suppress overlapping cacheline warnings in DMA debug
2918-
* builds.
2915+
* Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
2916+
* to indicate that the CPU will not dirty any cacheline overlapping this buffer
2917+
* while it is available, and to suppress overlapping cacheline warnings in DMA
2918+
* debug builds.
29192919
*
29202920
* Caller must ensure we don't call this with other virtqueue operations
29212921
* at the same time (except where noted).
@@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
29282928
gfp_t gfp)
29292929
{
29302930
return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
2931-
DMA_ATTR_CPU_CACHE_CLEAN);
2931+
DMA_ATTR_DEBUGGING_IGNORE_CACHELINES);
29322932
}
29332933
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
29342934

include/linux/dma-mapping.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,18 @@
8080
#define DMA_ATTR_MMIO (1UL << 10)
8181

8282
/*
83-
* DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline
84-
* overlapping this buffer while it is mapped for DMA. All mappings sharing
85-
* a cacheline must have this attribute for this to be considered safe.
83+
* DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be
84+
* overlapped. All mappings sharing a cacheline must have this attribute for
85+
* this to be considered safe.
8686
*/
87-
#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11)
87+
#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES (1UL << 11)
88+
89+
/*
90+
* DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required.
91+
* All mappings that carry this attribute can't work with SWIOTLB and cache
92+
* flushing.
93+
*/
94+
#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12)
8895

8996
/*
9097
* A dma_addr_t can hold any valid DMA or bus address for the platform. It can
@@ -248,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
248255
{
249256
return NULL;
250257
}
251-
static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
252-
dma_addr_t dma_handle, unsigned long attrs)
258+
static inline void dma_free_attrs(struct device *dev, size_t size,
259+
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
253260
{
254261
}
255262
static inline void *dmam_alloc_attrs(struct device *dev, size_t size,

include/trace/events/dma.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ TRACE_DEFINE_ENUM(DMA_NONE);
3232
{ DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \
3333
{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
3434
{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
35-
{ DMA_ATTR_MMIO, "MMIO" })
35+
{ DMA_ATTR_MMIO, "MMIO" }, \
36+
{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
37+
{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
3638

3739
DECLARE_EVENT_CLASS(dma_map,
3840
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,

kernel/dma/debug.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
453453
return overlap;
454454
}
455455

456-
static void active_cacheline_inc_overlap(phys_addr_t cln)
456+
static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean)
457457
{
458458
int overlap = active_cacheline_read_overlap(cln);
459459

@@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
462462
/* If we overflowed the overlap counter then we're potentially
463463
* leaking dma-mappings.
464464
*/
465-
WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
465+
WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
466466
pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
467467
ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
468468
}
@@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry,
495495
if (rc == -EEXIST) {
496496
struct dma_debug_entry *existing;
497497

498-
active_cacheline_inc_overlap(cln);
498+
active_cacheline_inc_overlap(cln, entry->is_cache_clean);
499499
existing = radix_tree_lookup(&dma_active_cacheline, cln);
500500
/* A lookup failure here after we got -EEXIST is unexpected. */
501501
WARN_ON(!existing);
@@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
601601
unsigned long flags;
602602
int rc;
603603

604-
entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
604+
entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES |
605+
DMA_ATTR_REQUIRE_COHERENT);
605606

606607
bucket = get_hash_bucket(entry, &flags);
607608
hash_bucket_add(bucket, entry);

kernel/dma/direct.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
8989
dma_addr_t dma_addr;
9090

9191
if (is_swiotlb_force_bounce(dev)) {
92-
if (attrs & DMA_ATTR_MMIO)
93-
goto err_overflow;
92+
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
93+
return DMA_MAPPING_ERROR;
9494

9595
return swiotlb_map(dev, phys, size, dir, attrs);
9696
}
@@ -103,7 +103,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
103103
dma_addr = phys_to_dma(dev, phys);
104104
if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
105105
dma_kmalloc_needs_bounce(dev, size, dir)) {
106-
if (is_swiotlb_active(dev))
106+
if (is_swiotlb_active(dev) &&
107+
!(attrs & DMA_ATTR_REQUIRE_COHERENT))
107108
return swiotlb_map(dev, phys, size, dir, attrs);
108109

109110
goto err_overflow;
@@ -132,7 +133,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
132133
{
133134
phys_addr_t phys;
134135

135-
if (attrs & DMA_ATTR_MMIO)
136+
if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
136137
/* nothing to do: uncached and no swiotlb */
137138
return;
138139

0 commit comments

Comments
 (0)