@@ -1289,16 +1289,6 @@ struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs)
12891289EXPORT_SYMBOL_IF_KUNIT (arm_smmu_invs_purge );
12901290
12911291/* Context descriptor manipulation functions */
1292- void arm_smmu_tlb_inv_asid (struct arm_smmu_device * smmu , u16 asid )
1293- {
1294- struct arm_smmu_cmdq_ent cmd = {
1295- .opcode = smmu -> features & ARM_SMMU_FEAT_E2H ?
1296- CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID ,
1297- .tlbi .asid = asid ,
1298- };
1299-
1300- arm_smmu_cmdq_issue_cmd_with_sync (smmu , & cmd );
1301- }
13021292
13031293/*
13041294 * Based on the value of ent report which bits of the STE the HW will access. It
@@ -2509,90 +2499,27 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
25092499 return arm_smmu_cmdq_batch_submit (master -> smmu , & cmds );
25102500}
25112501
2512- int arm_smmu_atc_inv_domain (struct arm_smmu_domain * smmu_domain ,
2513- unsigned long iova , size_t size )
2514- {
2515- struct arm_smmu_master_domain * master_domain ;
2516- int i ;
2517- unsigned long flags ;
2518- struct arm_smmu_cmdq_ent cmd = {
2519- .opcode = CMDQ_OP_ATC_INV ,
2520- };
2521- struct arm_smmu_cmdq_batch cmds ;
2522-
2523- if (!(smmu_domain -> smmu -> features & ARM_SMMU_FEAT_ATS ))
2524- return 0 ;
2525-
2526- /*
2527- * Ensure that we've completed prior invalidation of the main TLBs
2528- * before we read 'nr_ats_masters' in case of a concurrent call to
2529- * arm_smmu_enable_ats():
2530- *
2531- * // unmap() // arm_smmu_enable_ats()
2532- * TLBI+SYNC atomic_inc(&nr_ats_masters);
2533- * smp_mb(); [...]
2534- * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2535- *
2536- * Ensures that we always see the incremented 'nr_ats_masters' count if
2537- * ATS was enabled at the PCI device before completion of the TLBI.
2538- */
2539- smp_mb ();
2540- if (!atomic_read (& smmu_domain -> nr_ats_masters ))
2541- return 0 ;
2542-
2543- arm_smmu_cmdq_batch_init (smmu_domain -> smmu , & cmds , & cmd );
2544-
2545- spin_lock_irqsave (& smmu_domain -> devices_lock , flags );
2546- list_for_each_entry (master_domain , & smmu_domain -> devices ,
2547- devices_elm ) {
2548- struct arm_smmu_master * master = master_domain -> master ;
2549-
2550- if (!master -> ats_enabled )
2551- continue ;
2552-
2553- if (master_domain -> nested_ats_flush ) {
2554- /*
2555- * If a S2 used as a nesting parent is changed we have
2556- * no option but to completely flush the ATC.
2557- */
2558- arm_smmu_atc_inv_to_cmd (IOMMU_NO_PASID , 0 , 0 , & cmd );
2559- } else {
2560- arm_smmu_atc_inv_to_cmd (master_domain -> ssid , iova , size ,
2561- & cmd );
2562- }
2563-
2564- for (i = 0 ; i < master -> num_streams ; i ++ ) {
2565- cmd .atc .sid = master -> streams [i ].id ;
2566- arm_smmu_cmdq_batch_add (smmu_domain -> smmu , & cmds , & cmd );
2567- }
2568- }
2569- spin_unlock_irqrestore (& smmu_domain -> devices_lock , flags );
2570-
2571- return arm_smmu_cmdq_batch_submit (smmu_domain -> smmu , & cmds );
2572- }
2573-
25742502/* IO_PGTABLE API */
25752503static void arm_smmu_tlb_inv_context (void * cookie )
25762504{
25772505 struct arm_smmu_domain * smmu_domain = cookie ;
2578- struct arm_smmu_device * smmu = smmu_domain -> smmu ;
2579- struct arm_smmu_cmdq_ent cmd ;
25802506
25812507 /*
2582- * NOTE: when io-pgtable is in non-strict mode, we may get here with
2583- * PTEs previously cleared by unmaps on the current CPU not yet visible
2584- * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2585- * insertion to guarantee those are observed before the TLBI. Do be
2586- * careful, 007.
2508+ * If the DMA API is running in non-strict mode then another CPU could
2509+ * have changed the page table and not invoked any flush op. Instead the
2510+ * other CPU will do an atomic_read() and this CPU will have done an
2511+ * atomic_write(). That handshake is enough to acquire the page table
2512+ * writes from the other CPU.
2513+ *
2514+ * All command execution has a dma_wmb() to release all the in-memory
2515+ * structures written by this CPU, that barrier must also release the
2516+ * writes acquired from all the other CPUs too.
2517+ *
2518+ * There are other barriers and atomics on this path, but the above is
2519+ * the essential mechanism for ensuring that HW sees the page table
2520+ * writes from another CPU before it executes the IOTLB invalidation.
25872521 */
2588- if (smmu_domain -> stage == ARM_SMMU_DOMAIN_S1 ) {
2589- arm_smmu_tlb_inv_asid (smmu , smmu_domain -> cd .asid );
2590- } else {
2591- cmd .opcode = CMDQ_OP_TLBI_S12_VMALL ;
2592- cmd .tlbi .vmid = smmu_domain -> s2_cfg .vmid ;
2593- arm_smmu_cmdq_issue_cmd_with_sync (smmu , & cmd );
2594- }
2595- arm_smmu_atc_inv_domain (smmu_domain , 0 , 0 );
2522+ arm_smmu_domain_inv (smmu_domain );
25962523}
25972524
25982525static void arm_smmu_cmdq_batch_add_range (struct arm_smmu_device * smmu ,
@@ -2604,7 +2531,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
26042531 unsigned long end = iova + size , num_pages = 0 , tg = pgsize ;
26052532 size_t inv_range = granule ;
26062533
2607- if (!size )
2534+ if (WARN_ON_ONCE ( !size ) )
26082535 return ;
26092536
26102537 if (smmu -> features & ARM_SMMU_FEAT_RANGE_INV ) {
@@ -2659,76 +2586,6 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
26592586 }
26602587}
26612588
2662- static void __arm_smmu_tlb_inv_range (struct arm_smmu_cmdq_ent * cmd ,
2663- unsigned long iova , size_t size ,
2664- size_t granule ,
2665- struct arm_smmu_domain * smmu_domain )
2666- {
2667- struct arm_smmu_device * smmu = smmu_domain -> smmu ;
2668- struct arm_smmu_cmdq_batch cmds ;
2669- size_t pgsize ;
2670-
2671- /* Get the leaf page size */
2672- pgsize = __ffs (smmu_domain -> domain .pgsize_bitmap );
2673-
2674- arm_smmu_cmdq_batch_init (smmu , & cmds , cmd );
2675- arm_smmu_cmdq_batch_add_range (smmu , & cmds , cmd , iova , size , granule ,
2676- pgsize );
2677- arm_smmu_cmdq_batch_submit (smmu , & cmds );
2678- }
2679-
2680- static void arm_smmu_tlb_inv_range_domain (unsigned long iova , size_t size ,
2681- size_t granule , bool leaf ,
2682- struct arm_smmu_domain * smmu_domain )
2683- {
2684- struct arm_smmu_cmdq_ent cmd = {
2685- .tlbi = {
2686- .leaf = leaf ,
2687- },
2688- };
2689-
2690- if (smmu_domain -> stage == ARM_SMMU_DOMAIN_S1 ) {
2691- cmd .opcode = smmu_domain -> smmu -> features & ARM_SMMU_FEAT_E2H ?
2692- CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA ;
2693- cmd .tlbi .asid = smmu_domain -> cd .asid ;
2694- } else {
2695- cmd .opcode = CMDQ_OP_TLBI_S2_IPA ;
2696- cmd .tlbi .vmid = smmu_domain -> s2_cfg .vmid ;
2697- }
2698- __arm_smmu_tlb_inv_range (& cmd , iova , size , granule , smmu_domain );
2699-
2700- if (smmu_domain -> nest_parent ) {
2701- /*
2702- * When the S2 domain changes all the nested S1 ASIDs have to be
2703- * flushed too.
2704- */
2705- cmd .opcode = CMDQ_OP_TLBI_NH_ALL ;
2706- arm_smmu_cmdq_issue_cmd_with_sync (smmu_domain -> smmu , & cmd );
2707- }
2708-
2709- /*
2710- * Unfortunately, this can't be leaf-only since we may have
2711- * zapped an entire table.
2712- */
2713- arm_smmu_atc_inv_domain (smmu_domain , iova , size );
2714- }
2715-
2716- void arm_smmu_tlb_inv_range_asid (unsigned long iova , size_t size , int asid ,
2717- size_t granule , bool leaf ,
2718- struct arm_smmu_domain * smmu_domain )
2719- {
2720- struct arm_smmu_cmdq_ent cmd = {
2721- .opcode = smmu_domain -> smmu -> features & ARM_SMMU_FEAT_E2H ?
2722- CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA ,
2723- .tlbi = {
2724- .asid = asid ,
2725- .leaf = leaf ,
2726- },
2727- };
2728-
2729- __arm_smmu_tlb_inv_range (& cmd , iova , size , granule , smmu_domain );
2730- }
2731-
27322589static bool arm_smmu_inv_size_too_big (struct arm_smmu_device * smmu , size_t size ,
27332590 size_t granule )
27342591{
@@ -2930,7 +2787,9 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
29302787static void arm_smmu_tlb_inv_walk (unsigned long iova , size_t size ,
29312788 size_t granule , void * cookie )
29322789{
2933- arm_smmu_tlb_inv_range_domain (iova , size , granule , false, cookie );
2790+ struct arm_smmu_domain * smmu_domain = cookie ;
2791+
2792+ arm_smmu_domain_inv_range (smmu_domain , iova , size , granule , false);
29342793}
29352794
29362795static const struct iommu_flush_ops arm_smmu_flush_ops = {
@@ -4201,9 +4060,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
42014060 if (!gather -> pgsize )
42024061 return ;
42034062
4204- arm_smmu_tlb_inv_range_domain ( gather -> start ,
4205- gather -> end - gather -> start + 1 ,
4206- gather -> pgsize , true, smmu_domain );
4063+ arm_smmu_domain_inv_range ( smmu_domain , gather -> start ,
4064+ gather -> end - gather -> start + 1 ,
4065+ gather -> pgsize , true);
42074066}
42084067
42094068static phys_addr_t
0 commit comments