Skip to content

Commit 78b30c5

Browse files
Jiri Pirkomszyprow
authored andcommitted
dma-buf: heaps: system: add system_cc_shared heap for explicitly shared memory
Add a new "system_cc_shared" dma-buf heap to allow userspace to allocate shared (decrypted) memory for confidential computing (CoCo) VMs. On CoCo VMs, guest memory is private by default. The hardware uses an encryption bit in page table entries (C-bit on AMD SEV, "shared" bit on Intel TDX) to control whether a given memory access is private or shared. The kernel's direct map is set up as private, so pages returned by alloc_pages() are private in the direct map by default. To make this memory usable for devices that do not support DMA to private memory (no TDISP support), it has to be explicitly shared. A couple of things are needed to properly handle shared memory for the dma-buf use case: - set_memory_decrypted() on the direct map after allocation: Besides clearing the encryption bit in the direct map PTEs, this also notifies the hypervisor about the page state change. On free, the inverse set_memory_encrypted() must be called before returning pages to the allocator. If re-encryption fails, pages are intentionally leaked to prevent shared memory from being reused as private. - pgprot_decrypted() for userspace and kernel virtual mappings: Any new mapping of the shared pages, be it to userspace via mmap or to kernel vmalloc space via vmap, creates PTEs independent of the direct map. These must also have the encryption bit cleared, otherwise accesses through them would see encrypted (garbage) data. - DMA_ATTR_CC_SHARED for DMA mapping: Since the pages are already shared, the DMA API needs to be informed via DMA_ATTR_CC_SHARED so it can map them correctly as unencrypted for device access. On non-CoCo VMs, the system_cc_shared heap is not registered to prevent misuse by userspace that does not understand the security implications of explicitly shared memory. Signed-off-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: T.J. Mercier <tjmercier@google.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Acked-by: Sumit Semwal <sumit.semwal@linaro.org> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> Link: https://lore.kernel.org/r/20260325192352.437608-3-jiri@resnulli.us
1 parent f054804 commit 78b30c5

1 file changed

Lines changed: 98 additions & 5 deletions

File tree

drivers/dma-buf/heaps/system_heap.c

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,25 @@
1010
* Andrew F. Davis <afd@ti.com>
1111
*/
1212

13+
#include <linux/cc_platform.h>
1314
#include <linux/dma-buf.h>
1415
#include <linux/dma-mapping.h>
1516
#include <linux/dma-heap.h>
1617
#include <linux/err.h>
1718
#include <linux/highmem.h>
19+
#include <linux/mem_encrypt.h>
1820
#include <linux/mm.h>
21+
#include <linux/set_memory.h>
1922
#include <linux/module.h>
23+
#include <linux/pgtable.h>
2024
#include <linux/scatterlist.h>
2125
#include <linux/slab.h>
2226
#include <linux/vmalloc.h>
2327

28+
struct system_heap_priv {
29+
bool cc_shared;
30+
};
31+
2432
struct system_heap_buffer {
2533
struct dma_heap *heap;
2634
struct list_head attachments;
@@ -29,13 +37,15 @@ struct system_heap_buffer {
2937
struct sg_table sg_table;
3038
int vmap_cnt;
3139
void *vaddr;
40+
bool cc_shared;
3241
};
3342

3443
struct dma_heap_attachment {
3544
struct device *dev;
3645
struct sg_table table;
3746
struct list_head list;
3847
bool mapped;
48+
bool cc_shared;
3949
};
4050

4151
#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO)
@@ -52,6 +62,34 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP};
5262
static const unsigned int orders[] = {8, 4, 0};
5363
#define NUM_ORDERS ARRAY_SIZE(orders)
5464

65+
static int system_heap_set_page_decrypted(struct page *page)
66+
{
67+
unsigned long addr = (unsigned long)page_address(page);
68+
unsigned int nr_pages = 1 << compound_order(page);
69+
int ret;
70+
71+
ret = set_memory_decrypted(addr, nr_pages);
72+
if (ret)
73+
pr_warn_ratelimited("dma-buf system heap: failed to decrypt page at %p\n",
74+
page_address(page));
75+
76+
return ret;
77+
}
78+
79+
static int system_heap_set_page_encrypted(struct page *page)
80+
{
81+
unsigned long addr = (unsigned long)page_address(page);
82+
unsigned int nr_pages = 1 << compound_order(page);
83+
int ret;
84+
85+
ret = set_memory_encrypted(addr, nr_pages);
86+
if (ret)
87+
pr_warn_ratelimited("dma-buf system heap: failed to re-encrypt page at %p, leaking memory\n",
88+
page_address(page));
89+
90+
return ret;
91+
}
92+
5593
static int dup_sg_table(struct sg_table *from, struct sg_table *to)
5694
{
5795
struct scatterlist *sg, *new_sg;
@@ -90,6 +128,7 @@ static int system_heap_attach(struct dma_buf *dmabuf,
90128
a->dev = attachment->dev;
91129
INIT_LIST_HEAD(&a->list);
92130
a->mapped = false;
131+
a->cc_shared = buffer->cc_shared;
93132

94133
attachment->priv = a;
95134

@@ -119,9 +158,11 @@ static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attac
119158
{
120159
struct dma_heap_attachment *a = attachment->priv;
121160
struct sg_table *table = &a->table;
161+
unsigned long attrs;
122162
int ret;
123163

124-
ret = dma_map_sgtable(attachment->dev, table, direction, 0);
164+
attrs = a->cc_shared ? DMA_ATTR_CC_SHARED : 0;
165+
ret = dma_map_sgtable(attachment->dev, table, direction, attrs);
125166
if (ret)
126167
return ERR_PTR(ret);
127168

@@ -188,8 +229,13 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
188229
unsigned long addr = vma->vm_start;
189230
unsigned long pgoff = vma->vm_pgoff;
190231
struct scatterlist *sg;
232+
pgprot_t prot;
191233
int i, ret;
192234

235+
prot = vma->vm_page_prot;
236+
if (buffer->cc_shared)
237+
prot = pgprot_decrypted(prot);
238+
193239
for_each_sgtable_sg(table, sg, i) {
194240
unsigned long n = sg->length >> PAGE_SHIFT;
195241

@@ -206,8 +252,7 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
206252
if (addr + size > vma->vm_end)
207253
size = vma->vm_end - addr;
208254

209-
ret = remap_pfn_range(vma, addr, page_to_pfn(page),
210-
size, vma->vm_page_prot);
255+
ret = remap_pfn_range(vma, addr, page_to_pfn(page), size, prot);
211256
if (ret)
212257
return ret;
213258

@@ -225,6 +270,7 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
225270
struct page **pages = vmalloc(sizeof(struct page *) * npages);
226271
struct page **tmp = pages;
227272
struct sg_page_iter piter;
273+
pgprot_t prot;
228274
void *vaddr;
229275

230276
if (!pages)
@@ -235,7 +281,10 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
235281
*tmp++ = sg_page_iter_page(&piter);
236282
}
237283

238-
vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL);
284+
prot = PAGE_KERNEL;
285+
if (buffer->cc_shared)
286+
prot = pgprot_decrypted(prot);
287+
vaddr = vmap(pages, npages, VM_MAP, prot);
239288
vfree(pages);
240289

241290
if (!vaddr)
@@ -296,6 +345,14 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
296345
for_each_sgtable_sg(table, sg, i) {
297346
struct page *page = sg_page(sg);
298347

348+
/*
349+
* Intentionally leak pages that cannot be re-encrypted
350+
* to prevent shared memory from being reused.
351+
*/
352+
if (buffer->cc_shared &&
353+
system_heap_set_page_encrypted(page))
354+
continue;
355+
299356
__free_pages(page, compound_order(page));
300357
}
301358
sg_free_table(table);
@@ -347,6 +404,8 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
347404
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
348405
unsigned long size_remaining = len;
349406
unsigned int max_order = orders[0];
407+
struct system_heap_priv *priv = dma_heap_get_drvdata(heap);
408+
bool cc_shared = priv->cc_shared;
350409
struct dma_buf *dmabuf;
351410
struct sg_table *table;
352411
struct scatterlist *sg;
@@ -362,6 +421,7 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
362421
mutex_init(&buffer->lock);
363422
buffer->heap = heap;
364423
buffer->len = len;
424+
buffer->cc_shared = cc_shared;
365425

366426
INIT_LIST_HEAD(&pages);
367427
i = 0;
@@ -396,6 +456,14 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
396456
list_del(&page->lru);
397457
}
398458

459+
if (cc_shared) {
460+
for_each_sgtable_sg(table, sg, i) {
461+
ret = system_heap_set_page_decrypted(sg_page(sg));
462+
if (ret)
463+
goto free_pages;
464+
}
465+
}
466+
399467
/* create the dmabuf */
400468
exp_info.exp_name = dma_heap_get_name(heap);
401469
exp_info.ops = &system_heap_buf_ops;
@@ -413,6 +481,13 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
413481
for_each_sgtable_sg(table, sg, i) {
414482
struct page *p = sg_page(sg);
415483

484+
/*
485+
* Intentionally leak pages that cannot be re-encrypted
486+
* to prevent shared memory from being reused.
487+
*/
488+
if (buffer->cc_shared &&
489+
system_heap_set_page_encrypted(p))
490+
continue;
416491
__free_pages(p, compound_order(p));
417492
}
418493
sg_free_table(table);
@@ -428,15 +503,33 @@ static const struct dma_heap_ops system_heap_ops = {
428503
.allocate = system_heap_allocate,
429504
};
430505

506+
static struct system_heap_priv system_heap_priv = {
507+
.cc_shared = false,
508+
};
509+
510+
static struct system_heap_priv system_heap_cc_shared_priv = {
511+
.cc_shared = true,
512+
};
513+
431514
static int __init system_heap_create(void)
432515
{
433516
struct dma_heap_export_info exp_info;
434517
struct dma_heap *sys_heap;
435518

436519
exp_info.name = "system";
437520
exp_info.ops = &system_heap_ops;
438-
exp_info.priv = NULL;
521+
exp_info.priv = &system_heap_priv;
522+
523+
sys_heap = dma_heap_add(&exp_info);
524+
if (IS_ERR(sys_heap))
525+
return PTR_ERR(sys_heap);
526+
527+
if (IS_ENABLED(CONFIG_HIGHMEM) ||
528+
!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
529+
return 0;
439530

531+
exp_info.name = "system_cc_shared";
532+
exp_info.priv = &system_heap_cc_shared_priv;
440533
sys_heap = dma_heap_add(&exp_info);
441534
if (IS_ERR(sys_heap))
442535
return PTR_ERR(sys_heap);

0 commit comments

Comments
 (0)