Skip to content

Commit 4202fdd

Browse files
nicolincwilldeacon
authored andcommitted
iommu/arm-smmu-v3: Perform per-domain invalidations using arm_smmu_invs
Replace the old invalidation functions with arm_smmu_domain_inv_range() in all the existing invalidation routines. And deprecate the old functions. The new arm_smmu_domain_inv_range() handles the CMDQ_MAX_TLBI_OPS as well, so drop it in the SVA function. Since arm_smmu_cmdq_batch_add_range() has only one caller now, and it must be given a valid size, add a WARN_ON_ONCE to catch any missed case. Also update the comments in arm_smmu_tlb_inv_context() to clarify things with the new invalidation functions. Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Will Deacon <will@kernel.org>
1 parent 587bb3e commit 4202fdd

3 files changed

Lines changed: 24 additions & 195 deletions

File tree

drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -122,15 +122,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
122122
}
123123
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd);
124124

125-
/*
126-
* Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this
127-
* is used as a threshold to replace per-page TLBI commands to issue in the
128-
* command queue with an address-space TLBI command, when SMMU w/o a range
129-
* invalidation feature handles too many per-page TLBI commands, which will
130-
* otherwise result in a soft lockup.
131-
*/
132-
#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3))
133-
134125
static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
135126
struct mm_struct *mm,
136127
unsigned long start,
@@ -146,21 +137,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
146137
* range. So do a simple translation here by calculating size correctly.
147138
*/
148139
size = end - start;
149-
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) {
150-
if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE)
151-
size = 0;
152-
} else {
153-
if (size == ULONG_MAX)
154-
size = 0;
155-
}
156-
157-
if (!size)
158-
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
159-
else
160-
arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid,
161-
PAGE_SIZE, false, smmu_domain);
162140

163-
arm_smmu_atc_inv_domain(smmu_domain, start, size);
141+
arm_smmu_domain_inv_range(smmu_domain, start, size, PAGE_SIZE, false);
164142
}
165143

166144
static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -191,8 +169,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
191169
}
192170
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
193171

194-
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
195-
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
172+
arm_smmu_domain_inv(smmu_domain);
196173
}
197174

198175
static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
@@ -302,7 +279,7 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
302279
/*
303280
* Ensure the ASID is empty in the iommu cache before allowing reuse.
304281
*/
305-
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
282+
arm_smmu_domain_inv(smmu_domain);
306283

307284
/*
308285
* Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can

drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c

Lines changed: 21 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,16 +1289,6 @@ struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs)
12891289
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_purge);
12901290

12911291
/* Context descriptor manipulation functions */
1292-
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
1293-
{
1294-
struct arm_smmu_cmdq_ent cmd = {
1295-
.opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
1296-
CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
1297-
.tlbi.asid = asid,
1298-
};
1299-
1300-
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1301-
}
13021292

13031293
/*
13041294
* Based on the value of ent report which bits of the STE the HW will access. It
@@ -2509,90 +2499,27 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
25092499
return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
25102500
}
25112501

2512-
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2513-
unsigned long iova, size_t size)
2514-
{
2515-
struct arm_smmu_master_domain *master_domain;
2516-
int i;
2517-
unsigned long flags;
2518-
struct arm_smmu_cmdq_ent cmd = {
2519-
.opcode = CMDQ_OP_ATC_INV,
2520-
};
2521-
struct arm_smmu_cmdq_batch cmds;
2522-
2523-
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2524-
return 0;
2525-
2526-
/*
2527-
* Ensure that we've completed prior invalidation of the main TLBs
2528-
* before we read 'nr_ats_masters' in case of a concurrent call to
2529-
* arm_smmu_enable_ats():
2530-
*
2531-
* // unmap() // arm_smmu_enable_ats()
2532-
* TLBI+SYNC atomic_inc(&nr_ats_masters);
2533-
* smp_mb(); [...]
2534-
* atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2535-
*
2536-
* Ensures that we always see the incremented 'nr_ats_masters' count if
2537-
* ATS was enabled at the PCI device before completion of the TLBI.
2538-
*/
2539-
smp_mb();
2540-
if (!atomic_read(&smmu_domain->nr_ats_masters))
2541-
return 0;
2542-
2543-
arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
2544-
2545-
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2546-
list_for_each_entry(master_domain, &smmu_domain->devices,
2547-
devices_elm) {
2548-
struct arm_smmu_master *master = master_domain->master;
2549-
2550-
if (!master->ats_enabled)
2551-
continue;
2552-
2553-
if (master_domain->nested_ats_flush) {
2554-
/*
2555-
* If a S2 used as a nesting parent is changed we have
2556-
* no option but to completely flush the ATC.
2557-
*/
2558-
arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
2559-
} else {
2560-
arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
2561-
&cmd);
2562-
}
2563-
2564-
for (i = 0; i < master->num_streams; i++) {
2565-
cmd.atc.sid = master->streams[i].id;
2566-
arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2567-
}
2568-
}
2569-
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2570-
2571-
return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2572-
}
2573-
25742502
/* IO_PGTABLE API */
25752503
static void arm_smmu_tlb_inv_context(void *cookie)
25762504
{
25772505
struct arm_smmu_domain *smmu_domain = cookie;
2578-
struct arm_smmu_device *smmu = smmu_domain->smmu;
2579-
struct arm_smmu_cmdq_ent cmd;
25802506

25812507
/*
2582-
* NOTE: when io-pgtable is in non-strict mode, we may get here with
2583-
* PTEs previously cleared by unmaps on the current CPU not yet visible
2584-
* to the SMMU. We are relying on the dma_wmb() implicit during cmd
2585-
* insertion to guarantee those are observed before the TLBI. Do be
2586-
* careful, 007.
2508+
* If the DMA API is running in non-strict mode then another CPU could
2509+
* have changed the page table and not invoked any flush op. Instead the
2510+
* other CPU will do an atomic_read() and this CPU will have done an
2511+
* atomic_write(). That handshake is enough to acquire the page table
2512+
* writes from the other CPU.
2513+
*
2514+
* All command execution has a dma_wmb() to release all the in-memory
2515+
* structures written by this CPU, that barrier must also release the
2516+
* writes acquired from all the other CPUs too.
2517+
*
2518+
* There are other barriers and atomics on this path, but the above is
2519+
* the essential mechanism for ensuring that HW sees the page table
2520+
* writes from another CPU before it executes the IOTLB invalidation.
25872521
*/
2588-
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2589-
arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2590-
} else {
2591-
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
2592-
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2593-
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2594-
}
2595-
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
2522+
arm_smmu_domain_inv(smmu_domain);
25962523
}
25972524

25982525
static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
@@ -2604,7 +2531,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
26042531
unsigned long end = iova + size, num_pages = 0, tg = pgsize;
26052532
size_t inv_range = granule;
26062533

2607-
if (!size)
2534+
if (WARN_ON_ONCE(!size))
26082535
return;
26092536

26102537
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
@@ -2659,76 +2586,6 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
26592586
}
26602587
}
26612588

2662-
static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2663-
unsigned long iova, size_t size,
2664-
size_t granule,
2665-
struct arm_smmu_domain *smmu_domain)
2666-
{
2667-
struct arm_smmu_device *smmu = smmu_domain->smmu;
2668-
struct arm_smmu_cmdq_batch cmds;
2669-
size_t pgsize;
2670-
2671-
/* Get the leaf page size */
2672-
pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
2673-
2674-
arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
2675-
arm_smmu_cmdq_batch_add_range(smmu, &cmds, cmd, iova, size, granule,
2676-
pgsize);
2677-
arm_smmu_cmdq_batch_submit(smmu, &cmds);
2678-
}
2679-
2680-
static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2681-
size_t granule, bool leaf,
2682-
struct arm_smmu_domain *smmu_domain)
2683-
{
2684-
struct arm_smmu_cmdq_ent cmd = {
2685-
.tlbi = {
2686-
.leaf = leaf,
2687-
},
2688-
};
2689-
2690-
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2691-
cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2692-
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2693-
cmd.tlbi.asid = smmu_domain->cd.asid;
2694-
} else {
2695-
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
2696-
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2697-
}
2698-
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2699-
2700-
if (smmu_domain->nest_parent) {
2701-
/*
2702-
* When the S2 domain changes all the nested S1 ASIDs have to be
2703-
* flushed too.
2704-
*/
2705-
cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
2706-
arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
2707-
}
2708-
2709-
/*
2710-
* Unfortunately, this can't be leaf-only since we may have
2711-
* zapped an entire table.
2712-
*/
2713-
arm_smmu_atc_inv_domain(smmu_domain, iova, size);
2714-
}
2715-
2716-
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2717-
size_t granule, bool leaf,
2718-
struct arm_smmu_domain *smmu_domain)
2719-
{
2720-
struct arm_smmu_cmdq_ent cmd = {
2721-
.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2722-
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2723-
.tlbi = {
2724-
.asid = asid,
2725-
.leaf = leaf,
2726-
},
2727-
};
2728-
2729-
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2730-
}
2731-
27322589
static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
27332590
size_t granule)
27342591
{
@@ -2930,7 +2787,9 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
29302787
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
29312788
size_t granule, void *cookie)
29322789
{
2933-
arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2790+
struct arm_smmu_domain *smmu_domain = cookie;
2791+
2792+
arm_smmu_domain_inv_range(smmu_domain, iova, size, granule, false);
29342793
}
29352794

29362795
static const struct iommu_flush_ops arm_smmu_flush_ops = {
@@ -4201,9 +4060,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
42014060
if (!gather->pgsize)
42024061
return;
42034062

4204-
arm_smmu_tlb_inv_range_domain(gather->start,
4205-
gather->end - gather->start + 1,
4206-
gather->pgsize, true, smmu_domain);
4063+
arm_smmu_domain_inv_range(smmu_domain, gather->start,
4064+
gather->end - gather->start + 1,
4065+
gather->pgsize, true);
42074066
}
42084067

42094068
static phys_addr_t

drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,13 +1080,6 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master,
10801080
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
10811081
struct arm_smmu_cd *cd, struct iommu_domain *old);
10821082

1083-
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
1084-
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1085-
size_t granule, bool leaf,
1086-
struct arm_smmu_domain *smmu_domain);
1087-
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1088-
unsigned long iova, size_t size);
1089-
10901083
void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,
10911084
unsigned long iova, size_t size,
10921085
unsigned int granule, bool leaf);

0 commit comments

Comments
 (0)