Skip to content

Commit d6c65b0

Browse files
jgunthorpejoergroedel
authored andcommitted
iommupt: Avoid rewalking during map
Currently the core code provides a simplified interface to drivers where it fragments a requested multi-page map into single page size steps after doing all the calculations to figure out what page size is appropriate. Each step rewalks the page tables from the start. Since iommupt has a single implementation of the mapping algorithm it can internally compute each step as it goes while retaining its current position in the walk. Add a new function pt_pgsz_count() which computes the same page size fragement of a large mapping operations. Compute the next fragment when all the leaf entries of the current fragement have been written, then continue walking from the current point. The function pointer is run through pt_iommu_ops instead of iommu_domain_ops to discourage using it outside iommupt. All drivers with their own page tables should continue to use the simplified map_pages() style interfaces. Reviewed-by: Samiullah Khawaja <skhawaja@google.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
1 parent 99fb8af commit d6c65b0

5 files changed

Lines changed: 175 additions & 65 deletions

File tree

drivers/iommu/generic_pt/iommu_pt.h

Lines changed: 82 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ struct pt_iommu_map_args {
477477
pt_oaddr_t oa;
478478
unsigned int leaf_pgsize_lg2;
479479
unsigned int leaf_level;
480+
pt_vaddr_t num_leaves;
480481
};
481482

482483
/*
@@ -529,11 +530,15 @@ static int clear_contig(const struct pt_state *start_pts,
529530
static int __map_range_leaf(struct pt_range *range, void *arg,
530531
unsigned int level, struct pt_table_p *table)
531532
{
533+
struct pt_iommu *iommu_table = iommu_from_common(range->common);
532534
struct pt_state pts = pt_init(range, level, table);
533535
struct pt_iommu_map_args *map = arg;
534536
unsigned int leaf_pgsize_lg2 = map->leaf_pgsize_lg2;
535537
unsigned int start_index;
536538
pt_oaddr_t oa = map->oa;
539+
unsigned int num_leaves;
540+
unsigned int orig_end;
541+
pt_vaddr_t last_va;
537542
unsigned int step;
538543
bool need_contig;
539544
int ret = 0;
@@ -547,6 +552,15 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
547552

548553
_pt_iter_first(&pts);
549554
start_index = pts.index;
555+
orig_end = pts.end_index;
556+
if (pts.index + map->num_leaves < pts.end_index) {
557+
/* Need to stop in the middle of the table to change sizes */
558+
pts.end_index = pts.index + map->num_leaves;
559+
num_leaves = 0;
560+
} else {
561+
num_leaves = map->num_leaves - (pts.end_index - pts.index);
562+
}
563+
550564
do {
551565
pts.type = pt_load_entry_raw(&pts);
552566
if (pts.type != PT_ENTRY_EMPTY || need_contig) {
@@ -572,7 +586,40 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
572586
flush_writes_range(&pts, start_index, pts.index);
573587

574588
map->oa = oa;
575-
return ret;
589+
map->num_leaves = num_leaves;
590+
if (ret || num_leaves)
591+
return ret;
592+
593+
/* range->va is not valid if we reached the end of the table */
594+
pts.index -= step;
595+
pt_index_to_va(&pts);
596+
pts.index += step;
597+
last_va = range->va + log2_to_int(leaf_pgsize_lg2);
598+
599+
if (last_va - 1 == range->last_va) {
600+
PT_WARN_ON(pts.index != orig_end);
601+
return 0;
602+
}
603+
604+
/*
605+
* Reached a point where the page size changed, compute the new
606+
* parameters.
607+
*/
608+
map->leaf_pgsize_lg2 = pt_compute_best_pgsize(
609+
iommu_table->domain.pgsize_bitmap, last_va, range->last_va, oa);
610+
map->leaf_level =
611+
pt_pgsz_lg2_to_level(range->common, map->leaf_pgsize_lg2);
612+
map->num_leaves = pt_pgsz_count(iommu_table->domain.pgsize_bitmap,
613+
last_va, range->last_va, oa,
614+
map->leaf_pgsize_lg2);
615+
616+
/* Didn't finish this table level, caller will repeat it */
617+
if (pts.index != orig_end) {
618+
if (pts.index != start_index)
619+
pt_index_to_va(&pts);
620+
return -EAGAIN;
621+
}
622+
return 0;
576623
}
577624

578625
static int __map_range(struct pt_range *range, void *arg, unsigned int level,
@@ -595,14 +642,9 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
595642
if (pts.type != PT_ENTRY_EMPTY)
596643
return -EADDRINUSE;
597644
ret = pt_iommu_new_table(&pts, &map->attrs);
598-
if (ret) {
599-
/*
600-
* Racing with another thread installing a table
601-
*/
602-
if (ret == -EAGAIN)
603-
continue;
645+
/* EAGAIN on a race will loop again */
646+
if (ret)
604647
return ret;
605-
}
606648
} else {
607649
pts.table_lower = pt_table_ptr(&pts);
608650
/*
@@ -626,17 +668,27 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
626668
* The already present table can possibly be shared with another
627669
* concurrent map.
628670
*/
629-
if (map->leaf_level == level - 1)
630-
ret = pt_descend(&pts, arg, __map_range_leaf);
631-
else
632-
ret = pt_descend(&pts, arg, __map_range);
671+
do {
672+
if (map->leaf_level == level - 1)
673+
ret = pt_descend(&pts, arg, __map_range_leaf);
674+
else
675+
ret = pt_descend(&pts, arg, __map_range);
676+
} while (ret == -EAGAIN);
633677
if (ret)
634678
return ret;
635679

636680
pts.index++;
637681
pt_index_to_va(&pts);
638682
if (pts.index >= pts.end_index)
639683
break;
684+
685+
/*
686+
* This level is currently running __map_range_leaf() which is
687+
* not correct if the target level has been updated to this
688+
* level. Have the caller invoke __map_range_leaf.
689+
*/
690+
if (map->leaf_level == level)
691+
return -EAGAIN;
640692
} while (true);
641693
return 0;
642694
}
@@ -808,63 +860,39 @@ static int check_map_range(struct pt_iommu *iommu_table, struct pt_range *range,
808860
static int do_map(struct pt_range *range, struct pt_common *common,
809861
bool single_page, struct pt_iommu_map_args *map)
810862
{
863+
int ret;
864+
811865
/*
812866
* The __map_single_page() fast path does not support DMA_INCOHERENT
813867
* flushing to keep its .text small.
814868
*/
815869
if (single_page && !pt_feature(common, PT_FEAT_DMA_INCOHERENT)) {
816-
int ret;
817870

818871
ret = pt_walk_range(range, __map_single_page, map);
819872
if (ret != -EAGAIN)
820873
return ret;
821874
/* EAGAIN falls through to the full path */
822875
}
823876

824-
if (map->leaf_level == range->top_level)
825-
return pt_walk_range(range, __map_range_leaf, map);
826-
return pt_walk_range(range, __map_range, map);
877+
do {
878+
if (map->leaf_level == range->top_level)
879+
ret = pt_walk_range(range, __map_range_leaf, map);
880+
else
881+
ret = pt_walk_range(range, __map_range, map);
882+
} while (ret == -EAGAIN);
883+
return ret;
827884
}
828885

829-
/**
830-
* map_pages() - Install translation for an IOVA range
831-
* @domain: Domain to manipulate
832-
* @iova: IO virtual address to start
833-
* @paddr: Physical/Output address to start
834-
* @pgsize: Length of each page
835-
* @pgcount: Length of the range in pgsize units starting from @iova
836-
* @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO
837-
* @gfp: GFP flags for any memory allocations
838-
* @mapped: Total bytes successfully mapped
839-
*
840-
* The range starting at IOVA will have paddr installed into it. The caller
841-
* must specify a valid pgsize and pgcount to segment the range into compatible
842-
* blocks.
843-
*
844-
* On error the caller will probably want to invoke unmap on the range from iova
845-
* up to the amount indicated by @mapped to return the table back to an
846-
* unchanged state.
847-
*
848-
* Context: The caller must hold a write range lock that includes the whole
849-
* range.
850-
*
851-
* Returns: -ERRNO on failure, 0 on success. The number of bytes of VA that were
852-
* mapped are added to @mapped, @mapped is not zerod first.
853-
*/
854-
int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
855-
phys_addr_t paddr, size_t pgsize, size_t pgcount,
856-
int prot, gfp_t gfp, size_t *mapped)
886+
static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
887+
phys_addr_t paddr, dma_addr_t len, unsigned int prot,
888+
gfp_t gfp, size_t *mapped)
857889
{
858-
struct pt_iommu *iommu_table =
859-
container_of(domain, struct pt_iommu, domain);
860890
pt_vaddr_t pgsize_bitmap = iommu_table->domain.pgsize_bitmap;
861891
struct pt_common *common = common_from_iommu(iommu_table);
862892
struct iommu_iotlb_gather iotlb_gather;
863-
pt_vaddr_t len = pgsize * pgcount;
864893
struct pt_iommu_map_args map = {
865894
.iotlb_gather = &iotlb_gather,
866895
.oa = paddr,
867-
.leaf_pgsize_lg2 = vaffs(pgsize),
868896
};
869897
bool single_page = false;
870898
struct pt_range range;
@@ -892,13 +920,13 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
892920
return ret;
893921

894922
/* Calculate target page size and level for the leaves */
895-
if (pt_has_system_page_size(common) && pgsize == PAGE_SIZE &&
896-
pgcount == 1) {
923+
if (pt_has_system_page_size(common) && len == PAGE_SIZE) {
897924
PT_WARN_ON(!(pgsize_bitmap & PAGE_SIZE));
898925
if (log2_mod(iova | paddr, PAGE_SHIFT))
899926
return -ENXIO;
900927
map.leaf_pgsize_lg2 = PAGE_SHIFT;
901928
map.leaf_level = 0;
929+
map.num_leaves = 1;
902930
single_page = true;
903931
} else {
904932
map.leaf_pgsize_lg2 = pt_compute_best_pgsize(
@@ -907,6 +935,9 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
907935
return -ENXIO;
908936
map.leaf_level =
909937
pt_pgsz_lg2_to_level(common, map.leaf_pgsize_lg2);
938+
map.num_leaves = pt_pgsz_count(pgsize_bitmap, range.va,
939+
range.last_va, paddr,
940+
map.leaf_pgsize_lg2);
910941
}
911942

912943
ret = check_map_range(iommu_table, &range, &map);
@@ -929,7 +960,6 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
929960
*mapped += map.oa - paddr;
930961
return ret;
931962
}
932-
EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(map_pages), "GENERIC_PT_IOMMU");
933963

934964
struct pt_unmap_args {
935965
struct iommu_pages_list free_list;
@@ -1098,6 +1128,7 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
10981128
}
10991129

11001130
static const struct pt_iommu_ops NS(ops) = {
1131+
.map_range = NS(map_range),
11011132
.unmap_range = NS(unmap_range),
11021133
#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) && defined(pt_entry_is_write_dirty) && \
11031134
IS_ENABLED(CONFIG_IOMMUFD_TEST) && defined(pt_entry_make_write_dirty)

drivers/iommu/generic_pt/kunit_generic_pt.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,17 @@ static void test_best_pgsize(struct kunit *test)
312312
}
313313
}
314314

315+
static void test_pgsz_count(struct kunit *test)
316+
{
317+
KUNIT_EXPECT_EQ(test,
318+
pt_pgsz_count(SZ_4K, 0, SZ_1G - 1, 0, ilog2(SZ_4K)),
319+
SZ_1G / SZ_4K);
320+
KUNIT_EXPECT_EQ(test,
321+
pt_pgsz_count(SZ_2M | SZ_4K, SZ_4K, SZ_1G - 1, SZ_4K,
322+
ilog2(SZ_4K)),
323+
(SZ_2M - SZ_4K) / SZ_4K);
324+
}
325+
315326
/*
316327
* Check that pt_install_table() and pt_table_pa() match
317328
*/
@@ -770,6 +781,7 @@ static struct kunit_case generic_pt_test_cases[] = {
770781
KUNIT_CASE_FMT(test_init),
771782
KUNIT_CASE_FMT(test_bitops),
772783
KUNIT_CASE_FMT(test_best_pgsize),
784+
KUNIT_CASE_FMT(test_pgsz_count),
773785
KUNIT_CASE_FMT(test_table_ptr),
774786
KUNIT_CASE_FMT(test_max_va),
775787
KUNIT_CASE_FMT(test_table_radix),

drivers/iommu/generic_pt/pt_iter.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,28 @@ static inline unsigned int pt_compute_best_pgsize(pt_vaddr_t pgsz_bitmap,
569569
return pgsz_lg2;
570570
}
571571

572+
/*
573+
* Return the number of pgsize_lg2 leaf entries that can be mapped for
574+
* va to oa. This accounts for any requirement to reduce or increase the page
575+
* size across the VA range.
576+
*/
577+
static inline pt_vaddr_t pt_pgsz_count(pt_vaddr_t pgsz_bitmap, pt_vaddr_t va,
578+
pt_vaddr_t last_va, pt_oaddr_t oa,
579+
unsigned int pgsize_lg2)
580+
{
581+
pt_vaddr_t len = last_va - va + 1;
582+
pt_vaddr_t next_pgsizes = log2_set_mod(pgsz_bitmap, 0, pgsize_lg2 + 1);
583+
584+
if (next_pgsizes) {
585+
unsigned int next_pgsize_lg2 = vaffs(next_pgsizes);
586+
587+
if (log2_mod(va ^ oa, next_pgsize_lg2) == 0)
588+
len = min(len, log2_set_mod_max(va, next_pgsize_lg2) -
589+
va + 1);
590+
}
591+
return log2_div(len, pgsize_lg2);
592+
}
593+
572594
#define _PT_MAKE_CALL_LEVEL(fn) \
573595
static __always_inline int fn(struct pt_range *range, void *arg, \
574596
unsigned int level, \

drivers/iommu/iommu.c

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,14 +2569,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
25692569
return pgsize;
25702570
}
25712571

2572-
int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
2573-
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2572+
static int __iommu_map_domain_pgtbl(struct iommu_domain *domain,
2573+
unsigned long iova, phys_addr_t paddr,
2574+
size_t size, int prot, gfp_t gfp)
25742575
{
25752576
const struct iommu_domain_ops *ops = domain->ops;
25762577
unsigned long orig_iova = iova;
25772578
unsigned int min_pagesz;
25782579
size_t orig_size = size;
2579-
phys_addr_t orig_paddr = paddr;
25802580
int ret = 0;
25812581

25822582
might_sleep_if(gfpflags_allow_blocking(gfp));
@@ -2633,12 +2633,9 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
26332633
/* unroll mapping in case something went wrong */
26342634
if (ret) {
26352635
iommu_unmap(domain, orig_iova, orig_size - size);
2636-
} else {
2637-
trace_map(orig_iova, orig_paddr, orig_size);
2638-
iommu_debug_map(domain, orig_paddr, orig_size);
2636+
return ret;
26392637
}
2640-
2641-
return ret;
2638+
return 0;
26422639
}
26432640

26442641
int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
@@ -2650,6 +2647,32 @@ int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
26502647
return ops->iotlb_sync_map(domain, iova, size);
26512648
}
26522649

2650+
int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
2651+
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2652+
{
2653+
struct pt_iommu *pt = iommupt_from_domain(domain);
2654+
int ret;
2655+
2656+
if (pt) {
2657+
size_t mapped = 0;
2658+
2659+
ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp,
2660+
&mapped);
2661+
if (ret) {
2662+
iommu_unmap(domain, iova, mapped);
2663+
return ret;
2664+
}
2665+
return 0;
2666+
}
2667+
ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp);
2668+
if (!ret)
2669+
return ret;
2670+
2671+
trace_map(iova, paddr, size);
2672+
iommu_debug_map(domain, paddr, size);
2673+
return 0;
2674+
}
2675+
26532676
int iommu_map(struct iommu_domain *domain, unsigned long iova,
26542677
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
26552678
{

0 commit comments

Comments
 (0)