Skip to content

Commit e5ef321

Browse files
jgunthorpejoergroedel
authored andcommitted
iommu/riscv: Use the generic iommu page table
This is a fairly straightforward conversion of the RISC-V iommu driver to use the generic iommu page table code. Invalidation stays as it is now with the driver pretending to implement simple range based invalidation even though the HW is more like ARM SMMUv3 than AMD where the HW implements a single-PTE based invalidation. Future work to extend the generic invalidate mechanism to support more ARM-like semantics would benefit this driver as well. Delete the existing page table code. Tested-by: Vincent Chen <vincent.chen@sifive.com> Acked-by: Paul Walmsley <pjw@kernel.org> # arch/riscv Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com> Tested-by: Tomasz Jeznach <tjeznach@rivosinc.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
1 parent e93e4a6 commit e5ef321

2 files changed

Lines changed: 39 additions & 251 deletions

File tree

drivers/iommu/riscv/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ config RISCV_IOMMU
66
depends on RISCV && 64BIT
77
default y
88
select IOMMU_API
9+
select GENERIC_PT
10+
select IOMMU_PT
11+
select IOMMU_PT_RISCV64
912
help
1013
Support for implementations of the RISC-V IOMMU architecture that
1114
complements the RISC-V MMU capabilities, providing similar address

drivers/iommu/riscv/iommu.c

Lines changed: 36 additions & 251 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <linux/iopoll.h>
2222
#include <linux/kernel.h>
2323
#include <linux/pci.h>
24+
#include <linux/generic_pt/iommu.h>
2425

2526
#include "../iommu-pages.h"
2627
#include "iommu-bits.h"
@@ -806,14 +807,15 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
806807

807808
/* This struct contains protection domain specific IOMMU driver data. */
808809
struct riscv_iommu_domain {
809-
struct iommu_domain domain;
810+
union {
811+
struct iommu_domain domain;
812+
struct pt_iommu_riscv_64 riscvpt;
813+
};
810814
struct list_head bonds;
811815
spinlock_t lock; /* protect bonds list updates. */
812816
int pscid;
813-
int numa_node;
814-
unsigned int pgd_mode;
815-
unsigned long *pgd_root;
816817
};
818+
PT_IOMMU_CHECK_DOMAIN(struct riscv_iommu_domain, riscvpt.iommu, domain);
817819

818820
#define iommu_domain_to_riscv(iommu_domain) \
819821
container_of(iommu_domain, struct riscv_iommu_domain, domain)
@@ -1076,156 +1078,9 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
10761078
{
10771079
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
10781080

1079-
riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
1080-
}
1081-
1082-
#define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))
1083-
1084-
#define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
1085-
#define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF)
1086-
#define _io_pte_none(pte) ((pte) == 0)
1087-
#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
1088-
1089-
static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
1090-
unsigned long pte,
1091-
struct iommu_pages_list *freelist)
1092-
{
1093-
unsigned long *ptr;
1094-
int i;
1095-
1096-
if (!_io_pte_present(pte) || _io_pte_leaf(pte))
1097-
return;
1098-
1099-
ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
1100-
1101-
/* Recursively free all sub page table pages */
1102-
for (i = 0; i < PTRS_PER_PTE; i++) {
1103-
pte = READ_ONCE(ptr[i]);
1104-
if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte)
1105-
riscv_iommu_pte_free(domain, pte, freelist);
1106-
}
1107-
1108-
if (freelist)
1109-
iommu_pages_list_add(freelist, ptr);
1110-
else
1111-
iommu_free_pages(ptr);
1112-
}
1113-
1114-
static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
1115-
unsigned long iova, size_t pgsize,
1116-
gfp_t gfp)
1117-
{
1118-
unsigned long *ptr = domain->pgd_root;
1119-
unsigned long pte, old;
1120-
int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
1121-
void *addr;
1122-
1123-
do {
1124-
const int shift = PAGE_SHIFT + PT_SHIFT * level;
1125-
1126-
ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
1127-
/*
1128-
* Note: returned entry might be a non-leaf if there was
1129-
* existing mapping with smaller granularity. Up to the caller
1130-
* to replace and invalidate.
1131-
*/
1132-
if (((size_t)1 << shift) == pgsize)
1133-
return ptr;
1134-
pte_retry:
1135-
pte = READ_ONCE(*ptr);
1136-
/*
1137-
* This is very likely incorrect as we should not be adding
1138-
* new mapping with smaller granularity on top
1139-
* of existing 2M/1G mapping. Fail.
1140-
*/
1141-
if (_io_pte_present(pte) && _io_pte_leaf(pte))
1142-
return NULL;
1143-
/*
1144-
* Non-leaf entry is missing, allocate and try to add to the
1145-
* page table. This might race with other mappings, retry.
1146-
*/
1147-
if (_io_pte_none(pte)) {
1148-
addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp,
1149-
SZ_4K);
1150-
if (!addr)
1151-
return NULL;
1152-
old = pte;
1153-
pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE);
1154-
if (cmpxchg_relaxed(ptr, old, pte) != old) {
1155-
iommu_free_pages(addr);
1156-
goto pte_retry;
1157-
}
1158-
}
1159-
ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
1160-
} while (level-- > 0);
1161-
1162-
return NULL;
1163-
}
1164-
1165-
static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
1166-
unsigned long iova, size_t *pte_pgsize)
1167-
{
1168-
unsigned long *ptr = domain->pgd_root;
1169-
unsigned long pte;
1170-
int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
1171-
1172-
do {
1173-
const int shift = PAGE_SHIFT + PT_SHIFT * level;
1174-
1175-
ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
1176-
pte = READ_ONCE(*ptr);
1177-
if (_io_pte_present(pte) && _io_pte_leaf(pte)) {
1178-
*pte_pgsize = (size_t)1 << shift;
1179-
return ptr;
1180-
}
1181-
if (_io_pte_none(pte))
1182-
return NULL;
1183-
ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
1184-
} while (level-- > 0);
1185-
1186-
return NULL;
1187-
}
1188-
1189-
static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
1190-
unsigned long iova, phys_addr_t phys,
1191-
size_t pgsize, size_t pgcount, int prot,
1192-
gfp_t gfp, size_t *mapped)
1193-
{
1194-
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
1195-
size_t size = 0;
1196-
unsigned long *ptr;
1197-
unsigned long pte, old, pte_prot;
1198-
int rc = 0;
1199-
struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
1200-
1201-
if (!(prot & IOMMU_WRITE))
1202-
pte_prot = _PAGE_BASE | _PAGE_READ;
1203-
else
1204-
pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY;
1205-
1206-
while (pgcount) {
1207-
ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp);
1208-
if (!ptr) {
1209-
rc = -ENOMEM;
1210-
break;
1211-
}
1212-
1213-
old = READ_ONCE(*ptr);
1214-
pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);
1215-
if (cmpxchg_relaxed(ptr, old, pte) != old)
1216-
continue;
1217-
1218-
riscv_iommu_pte_free(domain, old, &freelist);
1219-
1220-
size += pgsize;
1221-
iova += pgsize;
1222-
phys += pgsize;
1223-
--pgcount;
1224-
}
1225-
1226-
*mapped = size;
1227-
1228-
if (!iommu_pages_list_empty(&freelist)) {
1081+
if (iommu_pages_list_empty(&gather->freelist)) {
1082+
riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
1083+
} else {
12291084
/*
12301085
* In 1.0 spec version, the smallest scope we can use to
12311086
* invalidate all levels of page table (i.e. leaf and non-leaf)
@@ -1234,71 +1089,20 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
12341089
* capability.NL (non-leaf) IOTINVAL command.
12351090
*/
12361091
riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
1237-
iommu_put_pages_list(&freelist);
1238-
}
1239-
1240-
return rc;
1241-
}
1242-
1243-
static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
1244-
unsigned long iova, size_t pgsize,
1245-
size_t pgcount,
1246-
struct iommu_iotlb_gather *gather)
1247-
{
1248-
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
1249-
size_t size = pgcount << __ffs(pgsize);
1250-
unsigned long *ptr, old;
1251-
size_t unmapped = 0;
1252-
size_t pte_size;
1253-
1254-
while (unmapped < size) {
1255-
ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
1256-
if (!ptr)
1257-
return unmapped;
1258-
1259-
/* partial unmap is not allowed, fail. */
1260-
if (iova & (pte_size - 1))
1261-
return unmapped;
1262-
1263-
old = READ_ONCE(*ptr);
1264-
if (cmpxchg_relaxed(ptr, old, 0) != old)
1265-
continue;
1266-
1267-
iommu_iotlb_gather_add_page(&domain->domain, gather, iova,
1268-
pte_size);
1269-
1270-
iova += pte_size;
1271-
unmapped += pte_size;
1092+
iommu_put_pages_list(&gather->freelist);
12721093
}
1273-
1274-
return unmapped;
1275-
}
1276-
1277-
static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
1278-
dma_addr_t iova)
1279-
{
1280-
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
1281-
size_t pte_size;
1282-
unsigned long *ptr;
1283-
1284-
ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
1285-
if (!ptr)
1286-
return 0;
1287-
1288-
return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1));
12891094
}
12901095

12911096
static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
12921097
{
12931098
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
1294-
const unsigned long pfn = virt_to_pfn(domain->pgd_root);
12951099

12961100
WARN_ON(!list_empty(&domain->bonds));
12971101

12981102
if ((int)domain->pscid > 0)
12991103
ida_free(&riscv_iommu_pscids, domain->pscid);
13001104

1301-
riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL);
1105+
pt_iommu_deinit(&domain->riscvpt.iommu);
13021106
kfree(domain);
13031107
}
13041108

@@ -1324,13 +1128,16 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
13241128
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
13251129
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
13261130
struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
1131+
struct pt_iommu_riscv_64_hw_info pt_info;
13271132
u64 fsc, ta;
13281133

1329-
if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))
1134+
pt_iommu_riscv_64_hw_info(&domain->riscvpt, &pt_info);
1135+
1136+
if (!riscv_iommu_pt_supported(iommu, pt_info.fsc_iosatp_mode))
13301137
return -ENODEV;
13311138

1332-
fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
1333-
FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
1139+
fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, pt_info.fsc_iosatp_mode) |
1140+
FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, pt_info.ppn);
13341141
ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
13351142
RISCV_IOMMU_PC_TA_V;
13361143

@@ -1345,80 +1152,56 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
13451152
}
13461153

13471154
static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = {
1155+
IOMMU_PT_DOMAIN_OPS(riscv_64),
13481156
.attach_dev = riscv_iommu_attach_paging_domain,
13491157
.free = riscv_iommu_free_paging_domain,
1350-
.map_pages = riscv_iommu_map_pages,
1351-
.unmap_pages = riscv_iommu_unmap_pages,
1352-
.iova_to_phys = riscv_iommu_iova_to_phys,
13531158
.iotlb_sync = riscv_iommu_iotlb_sync,
13541159
.flush_iotlb_all = riscv_iommu_iotlb_flush_all,
13551160
};
13561161

13571162
static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
13581163
{
1164+
struct pt_iommu_riscv_64_cfg cfg = {};
13591165
struct riscv_iommu_domain *domain;
13601166
struct riscv_iommu_device *iommu;
1361-
unsigned int pgd_mode;
1362-
dma_addr_t va_mask;
1363-
int va_bits;
1167+
int ret;
13641168

13651169
iommu = dev_to_iommu(dev);
13661170
if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) {
1367-
pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57;
1368-
va_bits = 57;
1171+
cfg.common.hw_max_vasz_lg2 = 57;
13691172
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) {
1370-
pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48;
1371-
va_bits = 48;
1173+
cfg.common.hw_max_vasz_lg2 = 48;
13721174
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) {
1373-
pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39;
1374-
va_bits = 39;
1175+
cfg.common.hw_max_vasz_lg2 = 39;
13751176
} else {
13761177
dev_err(dev, "cannot find supported page table mode\n");
13771178
return ERR_PTR(-ENODEV);
13781179
}
1180+
cfg.common.hw_max_oasz_lg2 = 56;
13791181

13801182
domain = kzalloc_obj(*domain);
13811183
if (!domain)
13821184
return ERR_PTR(-ENOMEM);
13831185

13841186
INIT_LIST_HEAD_RCU(&domain->bonds);
13851187
spin_lock_init(&domain->lock);
1386-
domain->numa_node = dev_to_node(iommu->dev);
1387-
domain->pgd_mode = pgd_mode;
1388-
domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,
1389-
GFP_KERNEL_ACCOUNT, SZ_4K);
1390-
if (!domain->pgd_root) {
1391-
kfree(domain);
1392-
return ERR_PTR(-ENOMEM);
1393-
}
1188+
cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
1189+
BIT(PT_FEAT_FLUSH_RANGE);
1190+
domain->riscvpt.iommu.nid = dev_to_node(iommu->dev);
1191+
domain->domain.ops = &riscv_iommu_paging_domain_ops;
13941192

13951193
domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1,
13961194
RISCV_IOMMU_MAX_PSCID, GFP_KERNEL);
13971195
if (domain->pscid < 0) {
1398-
iommu_free_pages(domain->pgd_root);
1399-
kfree(domain);
1196+
riscv_iommu_free_paging_domain(&domain->domain);
14001197
return ERR_PTR(-ENOMEM);
14011198
}
14021199

1403-
/*
1404-
* Note: RISC-V Privilege spec mandates that virtual addresses
1405-
* need to be sign-extended, so if (VA_BITS - 1) is set, all
1406-
* bits >= VA_BITS need to also be set or else we'll get a
1407-
* page fault. However the code that creates the mappings
1408-
* above us (e.g. iommu_dma_alloc_iova()) won't do that for us
1409-
* for now, so we'll end up with invalid virtual addresses
1410-
* to map. As a workaround until we get this sorted out
1411-
* limit the available virtual addresses to VA_BITS - 1.
1412-
*/
1413-
va_mask = DMA_BIT_MASK(va_bits - 1);
1414-
1415-
domain->domain.geometry.aperture_start = 0;
1416-
domain->domain.geometry.aperture_end = va_mask;
1417-
domain->domain.geometry.force_aperture = true;
1418-
domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G);
1419-
1420-
domain->domain.ops = &riscv_iommu_paging_domain_ops;
1421-
1200+
ret = pt_iommu_riscv_64_init(&domain->riscvpt, &cfg, GFP_KERNEL);
1201+
if (ret) {
1202+
riscv_iommu_free_paging_domain(&domain->domain);
1203+
return ERR_PTR(ret);
1204+
}
14221205
return &domain->domain;
14231206
}
14241207

@@ -1674,3 +1457,5 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
16741457
riscv_iommu_queue_disable(&iommu->cmdq);
16751458
return rc;
16761459
}
1460+
1461+
MODULE_IMPORT_NS("GENERIC_PT_IOMMU");

0 commit comments

Comments
 (0)