2121#include <linux/iopoll.h>
2222#include <linux/kernel.h>
2323#include <linux/pci.h>
24+ #include <linux/generic_pt/iommu.h>
2425
2526#include "../iommu-pages.h"
2627#include "iommu-bits.h"
@@ -806,14 +807,15 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
806807
807808/* This struct contains protection domain specific IOMMU driver data. */
808809struct riscv_iommu_domain {
809- struct iommu_domain domain ;
810+ union {
811+ struct iommu_domain domain ;
812+ struct pt_iommu_riscv_64 riscvpt ;
813+ };
810814 struct list_head bonds ;
811815 spinlock_t lock ; /* protect bonds list updates. */
812816 int pscid ;
813- int numa_node ;
814- unsigned int pgd_mode ;
815- unsigned long * pgd_root ;
816817};
818+ PT_IOMMU_CHECK_DOMAIN (struct riscv_iommu_domain , riscvpt .iommu , domain );
817819
818820#define iommu_domain_to_riscv (iommu_domain ) \
819821 container_of(iommu_domain, struct riscv_iommu_domain, domain)
@@ -1076,156 +1078,9 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
10761078{
10771079 struct riscv_iommu_domain * domain = iommu_domain_to_riscv (iommu_domain );
10781080
1079- riscv_iommu_iotlb_inval (domain , gather -> start , gather -> end );
1080- }
1081-
1082- #define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))
1083-
1084- #define _io_pte_present (pte ) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
1085- #define _io_pte_leaf (pte ) ((pte) & _PAGE_LEAF)
1086- #define _io_pte_none (pte ) ((pte) == 0)
1087- #define _io_pte_entry (pn , prot ) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
1088-
1089- static void riscv_iommu_pte_free (struct riscv_iommu_domain * domain ,
1090- unsigned long pte ,
1091- struct iommu_pages_list * freelist )
1092- {
1093- unsigned long * ptr ;
1094- int i ;
1095-
1096- if (!_io_pte_present (pte ) || _io_pte_leaf (pte ))
1097- return ;
1098-
1099- ptr = (unsigned long * )pfn_to_virt (__page_val_to_pfn (pte ));
1100-
1101- /* Recursively free all sub page table pages */
1102- for (i = 0 ; i < PTRS_PER_PTE ; i ++ ) {
1103- pte = READ_ONCE (ptr [i ]);
1104- if (!_io_pte_none (pte ) && cmpxchg_relaxed (ptr + i , pte , 0 ) == pte )
1105- riscv_iommu_pte_free (domain , pte , freelist );
1106- }
1107-
1108- if (freelist )
1109- iommu_pages_list_add (freelist , ptr );
1110- else
1111- iommu_free_pages (ptr );
1112- }
1113-
1114- static unsigned long * riscv_iommu_pte_alloc (struct riscv_iommu_domain * domain ,
1115- unsigned long iova , size_t pgsize ,
1116- gfp_t gfp )
1117- {
1118- unsigned long * ptr = domain -> pgd_root ;
1119- unsigned long pte , old ;
1120- int level = domain -> pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2 ;
1121- void * addr ;
1122-
1123- do {
1124- const int shift = PAGE_SHIFT + PT_SHIFT * level ;
1125-
1126- ptr += ((iova >> shift ) & (PTRS_PER_PTE - 1 ));
1127- /*
1128- * Note: returned entry might be a non-leaf if there was
1129- * existing mapping with smaller granularity. Up to the caller
1130- * to replace and invalidate.
1131- */
1132- if (((size_t )1 << shift ) == pgsize )
1133- return ptr ;
1134- pte_retry :
1135- pte = READ_ONCE (* ptr );
1136- /*
1137- * This is very likely incorrect as we should not be adding
1138- * new mapping with smaller granularity on top
1139- * of existing 2M/1G mapping. Fail.
1140- */
1141- if (_io_pte_present (pte ) && _io_pte_leaf (pte ))
1142- return NULL ;
1143- /*
1144- * Non-leaf entry is missing, allocate and try to add to the
1145- * page table. This might race with other mappings, retry.
1146- */
1147- if (_io_pte_none (pte )) {
1148- addr = iommu_alloc_pages_node_sz (domain -> numa_node , gfp ,
1149- SZ_4K );
1150- if (!addr )
1151- return NULL ;
1152- old = pte ;
1153- pte = _io_pte_entry (virt_to_pfn (addr ), _PAGE_TABLE );
1154- if (cmpxchg_relaxed (ptr , old , pte ) != old ) {
1155- iommu_free_pages (addr );
1156- goto pte_retry ;
1157- }
1158- }
1159- ptr = (unsigned long * )pfn_to_virt (__page_val_to_pfn (pte ));
1160- } while (level -- > 0 );
1161-
1162- return NULL ;
1163- }
1164-
1165- static unsigned long * riscv_iommu_pte_fetch (struct riscv_iommu_domain * domain ,
1166- unsigned long iova , size_t * pte_pgsize )
1167- {
1168- unsigned long * ptr = domain -> pgd_root ;
1169- unsigned long pte ;
1170- int level = domain -> pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2 ;
1171-
1172- do {
1173- const int shift = PAGE_SHIFT + PT_SHIFT * level ;
1174-
1175- ptr += ((iova >> shift ) & (PTRS_PER_PTE - 1 ));
1176- pte = READ_ONCE (* ptr );
1177- if (_io_pte_present (pte ) && _io_pte_leaf (pte )) {
1178- * pte_pgsize = (size_t )1 << shift ;
1179- return ptr ;
1180- }
1181- if (_io_pte_none (pte ))
1182- return NULL ;
1183- ptr = (unsigned long * )pfn_to_virt (__page_val_to_pfn (pte ));
1184- } while (level -- > 0 );
1185-
1186- return NULL ;
1187- }
1188-
1189- static int riscv_iommu_map_pages (struct iommu_domain * iommu_domain ,
1190- unsigned long iova , phys_addr_t phys ,
1191- size_t pgsize , size_t pgcount , int prot ,
1192- gfp_t gfp , size_t * mapped )
1193- {
1194- struct riscv_iommu_domain * domain = iommu_domain_to_riscv (iommu_domain );
1195- size_t size = 0 ;
1196- unsigned long * ptr ;
1197- unsigned long pte , old , pte_prot ;
1198- int rc = 0 ;
1199- struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT (freelist );
1200-
1201- if (!(prot & IOMMU_WRITE ))
1202- pte_prot = _PAGE_BASE | _PAGE_READ ;
1203- else
1204- pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY ;
1205-
1206- while (pgcount ) {
1207- ptr = riscv_iommu_pte_alloc (domain , iova , pgsize , gfp );
1208- if (!ptr ) {
1209- rc = - ENOMEM ;
1210- break ;
1211- }
1212-
1213- old = READ_ONCE (* ptr );
1214- pte = _io_pte_entry (phys_to_pfn (phys ), pte_prot );
1215- if (cmpxchg_relaxed (ptr , old , pte ) != old )
1216- continue ;
1217-
1218- riscv_iommu_pte_free (domain , old , & freelist );
1219-
1220- size += pgsize ;
1221- iova += pgsize ;
1222- phys += pgsize ;
1223- -- pgcount ;
1224- }
1225-
1226- * mapped = size ;
1227-
1228- if (!iommu_pages_list_empty (& freelist )) {
1081+ if (iommu_pages_list_empty (& gather -> freelist )) {
1082+ riscv_iommu_iotlb_inval (domain , gather -> start , gather -> end );
1083+ } else {
12291084 /*
12301085 * In 1.0 spec version, the smallest scope we can use to
12311086 * invalidate all levels of page table (i.e. leaf and non-leaf)
@@ -1234,71 +1089,20 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
12341089 * capability.NL (non-leaf) IOTINVAL command.
12351090 */
12361091 riscv_iommu_iotlb_inval (domain , 0 , ULONG_MAX );
1237- iommu_put_pages_list (& freelist );
1238- }
1239-
1240- return rc ;
1241- }
1242-
1243- static size_t riscv_iommu_unmap_pages (struct iommu_domain * iommu_domain ,
1244- unsigned long iova , size_t pgsize ,
1245- size_t pgcount ,
1246- struct iommu_iotlb_gather * gather )
1247- {
1248- struct riscv_iommu_domain * domain = iommu_domain_to_riscv (iommu_domain );
1249- size_t size = pgcount << __ffs (pgsize );
1250- unsigned long * ptr , old ;
1251- size_t unmapped = 0 ;
1252- size_t pte_size ;
1253-
1254- while (unmapped < size ) {
1255- ptr = riscv_iommu_pte_fetch (domain , iova , & pte_size );
1256- if (!ptr )
1257- return unmapped ;
1258-
1259- /* partial unmap is not allowed, fail. */
1260- if (iova & (pte_size - 1 ))
1261- return unmapped ;
1262-
1263- old = READ_ONCE (* ptr );
1264- if (cmpxchg_relaxed (ptr , old , 0 ) != old )
1265- continue ;
1266-
1267- iommu_iotlb_gather_add_page (& domain -> domain , gather , iova ,
1268- pte_size );
1269-
1270- iova += pte_size ;
1271- unmapped += pte_size ;
1092+ iommu_put_pages_list (& gather -> freelist );
12721093 }
1273-
1274- return unmapped ;
1275- }
1276-
1277- static phys_addr_t riscv_iommu_iova_to_phys (struct iommu_domain * iommu_domain ,
1278- dma_addr_t iova )
1279- {
1280- struct riscv_iommu_domain * domain = iommu_domain_to_riscv (iommu_domain );
1281- size_t pte_size ;
1282- unsigned long * ptr ;
1283-
1284- ptr = riscv_iommu_pte_fetch (domain , iova , & pte_size );
1285- if (!ptr )
1286- return 0 ;
1287-
1288- return pfn_to_phys (__page_val_to_pfn (* ptr )) | (iova & (pte_size - 1 ));
12891094}
12901095
12911096static void riscv_iommu_free_paging_domain (struct iommu_domain * iommu_domain )
12921097{
12931098 struct riscv_iommu_domain * domain = iommu_domain_to_riscv (iommu_domain );
1294- const unsigned long pfn = virt_to_pfn (domain -> pgd_root );
12951099
12961100 WARN_ON (!list_empty (& domain -> bonds ));
12971101
12981102 if ((int )domain -> pscid > 0 )
12991103 ida_free (& riscv_iommu_pscids , domain -> pscid );
13001104
1301- riscv_iommu_pte_free ( domain , _io_pte_entry ( pfn , _PAGE_TABLE ), NULL );
1105+ pt_iommu_deinit ( & domain -> riscvpt . iommu );
13021106 kfree (domain );
13031107}
13041108
@@ -1324,13 +1128,16 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
13241128 struct riscv_iommu_domain * domain = iommu_domain_to_riscv (iommu_domain );
13251129 struct riscv_iommu_device * iommu = dev_to_iommu (dev );
13261130 struct riscv_iommu_info * info = dev_iommu_priv_get (dev );
1131+ struct pt_iommu_riscv_64_hw_info pt_info ;
13271132 u64 fsc , ta ;
13281133
1329- if (!riscv_iommu_pt_supported (iommu , domain -> pgd_mode ))
1134+ pt_iommu_riscv_64_hw_info (& domain -> riscvpt , & pt_info );
1135+
1136+ if (!riscv_iommu_pt_supported (iommu , pt_info .fsc_iosatp_mode ))
13301137 return - ENODEV ;
13311138
1332- fsc = FIELD_PREP (RISCV_IOMMU_PC_FSC_MODE , domain -> pgd_mode ) |
1333- FIELD_PREP (RISCV_IOMMU_PC_FSC_PPN , virt_to_pfn ( domain -> pgd_root ) );
1139+ fsc = FIELD_PREP (RISCV_IOMMU_PC_FSC_MODE , pt_info . fsc_iosatp_mode ) |
1140+ FIELD_PREP (RISCV_IOMMU_PC_FSC_PPN , pt_info . ppn );
13341141 ta = FIELD_PREP (RISCV_IOMMU_PC_TA_PSCID , domain -> pscid ) |
13351142 RISCV_IOMMU_PC_TA_V ;
13361143
@@ -1345,80 +1152,56 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
13451152}
13461153
13471154static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = {
1155+ IOMMU_PT_DOMAIN_OPS (riscv_64 ),
13481156 .attach_dev = riscv_iommu_attach_paging_domain ,
13491157 .free = riscv_iommu_free_paging_domain ,
1350- .map_pages = riscv_iommu_map_pages ,
1351- .unmap_pages = riscv_iommu_unmap_pages ,
1352- .iova_to_phys = riscv_iommu_iova_to_phys ,
13531158 .iotlb_sync = riscv_iommu_iotlb_sync ,
13541159 .flush_iotlb_all = riscv_iommu_iotlb_flush_all ,
13551160};
13561161
13571162static struct iommu_domain * riscv_iommu_alloc_paging_domain (struct device * dev )
13581163{
1164+ struct pt_iommu_riscv_64_cfg cfg = {};
13591165 struct riscv_iommu_domain * domain ;
13601166 struct riscv_iommu_device * iommu ;
1361- unsigned int pgd_mode ;
1362- dma_addr_t va_mask ;
1363- int va_bits ;
1167+ int ret ;
13641168
13651169 iommu = dev_to_iommu (dev );
13661170 if (iommu -> caps & RISCV_IOMMU_CAPABILITIES_SV57 ) {
1367- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 ;
1368- va_bits = 57 ;
1171+ cfg .common .hw_max_vasz_lg2 = 57 ;
13691172 } else if (iommu -> caps & RISCV_IOMMU_CAPABILITIES_SV48 ) {
1370- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 ;
1371- va_bits = 48 ;
1173+ cfg .common .hw_max_vasz_lg2 = 48 ;
13721174 } else if (iommu -> caps & RISCV_IOMMU_CAPABILITIES_SV39 ) {
1373- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 ;
1374- va_bits = 39 ;
1175+ cfg .common .hw_max_vasz_lg2 = 39 ;
13751176 } else {
13761177 dev_err (dev , "cannot find supported page table mode\n" );
13771178 return ERR_PTR (- ENODEV );
13781179 }
1180+ cfg .common .hw_max_oasz_lg2 = 56 ;
13791181
13801182 domain = kzalloc_obj (* domain );
13811183 if (!domain )
13821184 return ERR_PTR (- ENOMEM );
13831185
13841186 INIT_LIST_HEAD_RCU (& domain -> bonds );
13851187 spin_lock_init (& domain -> lock );
1386- domain -> numa_node = dev_to_node (iommu -> dev );
1387- domain -> pgd_mode = pgd_mode ;
1388- domain -> pgd_root = iommu_alloc_pages_node_sz (domain -> numa_node ,
1389- GFP_KERNEL_ACCOUNT , SZ_4K );
1390- if (!domain -> pgd_root ) {
1391- kfree (domain );
1392- return ERR_PTR (- ENOMEM );
1393- }
1188+ cfg .common .features = BIT (PT_FEAT_SIGN_EXTEND ) |
1189+ BIT (PT_FEAT_FLUSH_RANGE );
1190+ domain -> riscvpt .iommu .nid = dev_to_node (iommu -> dev );
1191+ domain -> domain .ops = & riscv_iommu_paging_domain_ops ;
13941192
13951193 domain -> pscid = ida_alloc_range (& riscv_iommu_pscids , 1 ,
13961194 RISCV_IOMMU_MAX_PSCID , GFP_KERNEL );
13971195 if (domain -> pscid < 0 ) {
1398- iommu_free_pages (domain -> pgd_root );
1399- kfree (domain );
1196+ riscv_iommu_free_paging_domain (& domain -> domain );
14001197 return ERR_PTR (- ENOMEM );
14011198 }
14021199
1403- /*
1404- * Note: RISC-V Privilege spec mandates that virtual addresses
1405- * need to be sign-extended, so if (VA_BITS - 1) is set, all
1406- * bits >= VA_BITS need to also be set or else we'll get a
1407- * page fault. However the code that creates the mappings
1408- * above us (e.g. iommu_dma_alloc_iova()) won't do that for us
1409- * for now, so we'll end up with invalid virtual addresses
1410- * to map. As a workaround until we get this sorted out
1411- * limit the available virtual addresses to VA_BITS - 1.
1412- */
1413- va_mask = DMA_BIT_MASK (va_bits - 1 );
1414-
1415- domain -> domain .geometry .aperture_start = 0 ;
1416- domain -> domain .geometry .aperture_end = va_mask ;
1417- domain -> domain .geometry .force_aperture = true;
1418- domain -> domain .pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G );
1419-
1420- domain -> domain .ops = & riscv_iommu_paging_domain_ops ;
1421-
1200+ ret = pt_iommu_riscv_64_init (& domain -> riscvpt , & cfg , GFP_KERNEL );
1201+ if (ret ) {
1202+ riscv_iommu_free_paging_domain (& domain -> domain );
1203+ return ERR_PTR (ret );
1204+ }
14221205 return & domain -> domain ;
14231206}
14241207
@@ -1674,3 +1457,5 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
16741457 riscv_iommu_queue_disable (& iommu -> cmdq );
16751458 return rc ;
16761459}
1460+
1461+ MODULE_IMPORT_NS ("GENERIC_PT_IOMMU" );
0 commit comments