Skip to content

Commit e71e001

Browse files
jgunthorpejoergroedel
authored andcommitted
iommupt: Add the RISC-V page table format
The RISC-V format is a fairly simple 5 level page table not unlike the x86 one. It has optional support for a single contiguous page size of 64k (16 x 4k). The specification describes a 32-bit format, the general code can support it via a #define but the iommu side implementation has been left off until a user comes. Tested-by: Vincent Chen <vincent.chen@sifive.com> Acked-by: Paul Walmsley <pjw@kernel.org> # arch/riscv Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com> Tested-by: Tomasz Jeznach <tjeznach@rivosinc.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
1 parent f338e77 commit e71e001

8 files changed

Lines changed: 394 additions & 0 deletions

File tree

drivers/iommu/generic_pt/.kunitconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CONFIG_DEBUG_GENERIC_PT=y
55
CONFIG_IOMMU_PT=y
66
CONFIG_IOMMU_PT_AMDV1=y
77
CONFIG_IOMMU_PT_VTDSS=y
8+
CONFIG_IOMMU_PT_RISCV64=y
89
CONFIG_IOMMU_PT_X86_64=y
910
CONFIG_IOMMU_PT_KUNIT_TEST=y
1011

drivers/iommu/generic_pt/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ config IOMMU_PT_VTDSS
5252

5353
Selected automatically by an IOMMU driver that uses this format.
5454

55+
config IOMMU_PT_RISCV64
56+
tristate "IOMMU page table for RISC-V 64 bit Sv57/Sv48/Sv39"
57+
depends on !GENERIC_ATOMIC64 # for cmpxchg64
58+
help
59+
iommu_domain implementation for RISC-V 64 bit 3/4/5 level page table.
60+
It supports 4K/2M/1G/512G/256T page sizes and can decode a sign
61+
extended portion of the 64 bit IOVA space.
62+
63+
Selected automatically by an IOMMU driver that uses this format.
64+
5565
config IOMMU_PT_X86_64
5666
tristate "IOMMU page table for x86 64-bit, 4/5 levels"
5767
depends on !GENERIC_ATOMIC64 # for cmpxchg64
@@ -66,6 +76,7 @@ config IOMMU_PT_KUNIT_TEST
6676
tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS
6777
depends on KUNIT
6878
depends on IOMMU_PT_AMDV1 || !IOMMU_PT_AMDV1
79+
depends on IOMMU_PT_RISCV64 || !IOMMU_PT_RISCV64
6980
depends on IOMMU_PT_X86_64 || !IOMMU_PT_X86_64
7081
depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
7182
default KUNIT_ALL_TESTS

drivers/iommu/generic_pt/fmt/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ iommu_pt_fmt-$(CONFIG_IOMMUFD_TEST) += mock
55

66
iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
77

8+
iommu_pt_fmt-$(CONFIG_IOMMU_PT_RISCV64) += riscv64
9+
810
iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86_64) += x86_64
911

1012
IOMMU_PT_KUNIT_TEST :=
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
4+
*
5+
*/
6+
#ifndef __GENERIC_PT_FMT_DEFS_RISCV_H
7+
#define __GENERIC_PT_FMT_DEFS_RISCV_H
8+
9+
#include <linux/generic_pt/common.h>
10+
#include <linux/types.h>
11+
12+
#ifdef PT_RISCV_32BIT
13+
typedef u32 pt_riscv_entry_t;
14+
#define riscvpt_write_attrs riscv32pt_write_attrs
15+
#else
16+
typedef u64 pt_riscv_entry_t;
17+
#define riscvpt_write_attrs riscv64pt_write_attrs
18+
#endif
19+
20+
typedef pt_riscv_entry_t pt_vaddr_t;
21+
typedef u64 pt_oaddr_t;
22+
23+
struct riscvpt_write_attrs {
24+
pt_riscv_entry_t descriptor_bits;
25+
gfp_t gfp;
26+
};
27+
#define pt_write_attrs riscvpt_write_attrs
28+
29+
#endif
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
4+
*/
5+
#define PT_FMT riscv
6+
#define PT_FMT_VARIANT 64
7+
#define PT_SUPPORTED_FEATURES \
8+
(BIT(PT_FEAT_SIGN_EXTEND) | BIT(PT_FEAT_FLUSH_RANGE) | \
9+
BIT(PT_FEAT_RISCV_SVNAPOT_64K))
10+
11+
#include "iommu_template.h"
Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
4+
*
5+
* RISC-V page table
6+
*
7+
* This is described in Sections:
8+
* 12.3. Sv32: Page-Based 32-bit Virtual-Memory Systems
9+
* 12.4. Sv39: Page-Based 39-bit Virtual-Memory System
10+
* 12.5. Sv48: Page-Based 48-bit Virtual-Memory System
11+
* 12.6. Sv57: Page-Based 57-bit Virtual-Memory System
12+
* of the "The RISC-V Instruction Set Manual: Volume II"
13+
*
14+
* This includes the contiguous page extension from:
15+
* Chapter 13. "Svnapot" Extension for NAPOT Translation Contiguity,
16+
* Version 1.0
17+
*
18+
* The table format is sign extended and supports leafs in every level. The spec
19+
* doesn't talk a lot about levels, but level here is the same as i=LEVELS-1 in
20+
* the spec.
21+
*/
22+
#ifndef __GENERIC_PT_FMT_RISCV_H
23+
#define __GENERIC_PT_FMT_RISCV_H
24+
25+
#include "defs_riscv.h"
26+
#include "../pt_defs.h"
27+
28+
#include <linux/bitfield.h>
29+
#include <linux/container_of.h>
30+
#include <linux/log2.h>
31+
#include <linux/sizes.h>
32+
33+
enum {
34+
PT_ITEM_WORD_SIZE = sizeof(pt_riscv_entry_t),
35+
#ifdef PT_RISCV_32BIT
36+
PT_MAX_VA_ADDRESS_LG2 = 32,
37+
PT_MAX_OUTPUT_ADDRESS_LG2 = 34,
38+
PT_MAX_TOP_LEVEL = 1,
39+
#else
40+
PT_MAX_VA_ADDRESS_LG2 = 57,
41+
PT_MAX_OUTPUT_ADDRESS_LG2 = 56,
42+
PT_MAX_TOP_LEVEL = 4,
43+
#endif
44+
PT_GRANULE_LG2SZ = 12,
45+
PT_TABLEMEM_LG2SZ = 12,
46+
47+
/* fsc.PPN is 44 bits wide, all PPNs are 4k aligned */
48+
PT_TOP_PHYS_MASK = GENMASK_ULL(55, 12),
49+
};
50+
51+
/* PTE bits */
52+
enum {
53+
RISCVPT_V = BIT(0),
54+
RISCVPT_R = BIT(1),
55+
RISCVPT_W = BIT(2),
56+
RISCVPT_X = BIT(3),
57+
RISCVPT_U = BIT(4),
58+
RISCVPT_G = BIT(5),
59+
RISCVPT_A = BIT(6),
60+
RISCVPT_D = BIT(7),
61+
RISCVPT_RSW = GENMASK(9, 8),
62+
RISCVPT_PPN32 = GENMASK(31, 10),
63+
64+
RISCVPT_PPN64 = GENMASK_ULL(53, 10),
65+
RISCVPT_PPN64_64K = GENMASK_ULL(53, 14),
66+
RISCVPT_PBMT = GENMASK_ULL(62, 61),
67+
RISCVPT_N = BIT_ULL(63),
68+
69+
/* Svnapot encodings for ppn[0] */
70+
RISCVPT_PPN64_64K_SZ = BIT(13),
71+
};
72+
73+
#ifdef PT_RISCV_32BIT
74+
#define RISCVPT_PPN RISCVPT_PPN32
75+
#define pt_riscv pt_riscv_32
76+
#else
77+
#define RISCVPT_PPN RISCVPT_PPN64
78+
#define pt_riscv pt_riscv_64
79+
#endif
80+
81+
#define common_to_riscvpt(common_ptr) \
82+
container_of_const(common_ptr, struct pt_riscv, common)
83+
#define to_riscvpt(pts) common_to_riscvpt((pts)->range->common)
84+
85+
static inline pt_oaddr_t riscvpt_table_pa(const struct pt_state *pts)
86+
{
87+
return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
88+
}
89+
#define pt_table_pa riscvpt_table_pa
90+
91+
static inline pt_oaddr_t riscvpt_entry_oa(const struct pt_state *pts)
92+
{
93+
if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) &&
94+
pts->entry & RISCVPT_N) {
95+
PT_WARN_ON(pts->level != 0);
96+
return oalog2_mul(FIELD_GET(RISCVPT_PPN64_64K, pts->entry),
97+
ilog2(SZ_64K));
98+
}
99+
return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
100+
}
101+
#define pt_entry_oa riscvpt_entry_oa
102+
103+
static inline bool riscvpt_can_have_leaf(const struct pt_state *pts)
104+
{
105+
return true;
106+
}
107+
#define pt_can_have_leaf riscvpt_can_have_leaf
108+
109+
/* Body in pt_fmt_defaults.h */
110+
static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts);
111+
112+
static inline unsigned int
113+
riscvpt_entry_num_contig_lg2(const struct pt_state *pts)
114+
{
115+
if (PT_SUPPORTED_FEATURE(PT_FEAT_RISCV_SVNAPOT_64K) &&
116+
pts->entry & RISCVPT_N) {
117+
PT_WARN_ON(!pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K));
118+
PT_WARN_ON(pts->level);
119+
return ilog2(16);
120+
}
121+
return ilog2(1);
122+
}
123+
#define pt_entry_num_contig_lg2 riscvpt_entry_num_contig_lg2
124+
125+
static inline unsigned int riscvpt_num_items_lg2(const struct pt_state *pts)
126+
{
127+
return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
128+
}
129+
#define pt_num_items_lg2 riscvpt_num_items_lg2
130+
131+
static inline unsigned short
132+
riscvpt_contig_count_lg2(const struct pt_state *pts)
133+
{
134+
if (pts->level == 0 && pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K))
135+
return ilog2(16);
136+
return ilog2(1);
137+
}
138+
#define pt_contig_count_lg2 riscvpt_contig_count_lg2
139+
140+
static inline enum pt_entry_type riscvpt_load_entry_raw(struct pt_state *pts)
141+
{
142+
const pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
143+
pt_riscv_entry_t entry;
144+
145+
pts->entry = entry = READ_ONCE(tablep[pts->index]);
146+
if (!(entry & RISCVPT_V))
147+
return PT_ENTRY_EMPTY;
148+
if (pts->level == 0 ||
149+
((entry & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) != 0))
150+
return PT_ENTRY_OA;
151+
return PT_ENTRY_TABLE;
152+
}
153+
#define pt_load_entry_raw riscvpt_load_entry_raw
154+
155+
static inline void
156+
riscvpt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
157+
unsigned int oasz_lg2,
158+
const struct pt_write_attrs *attrs)
159+
{
160+
pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
161+
pt_riscv_entry_t entry;
162+
163+
if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
164+
return;
165+
166+
entry = RISCVPT_V |
167+
FIELD_PREP(RISCVPT_PPN, log2_div(oa, PT_GRANULE_LG2SZ)) |
168+
attrs->descriptor_bits;
169+
170+
if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) && pts->level == 0 &&
171+
oasz_lg2 != PT_GRANULE_LG2SZ) {
172+
u64 *end;
173+
174+
entry |= RISCVPT_N | RISCVPT_PPN64_64K_SZ;
175+
tablep += pts->index;
176+
end = tablep + log2_div(SZ_64K, PT_GRANULE_LG2SZ);
177+
for (; tablep != end; tablep++)
178+
WRITE_ONCE(*tablep, entry);
179+
} else {
180+
/* FIXME does riscv need this to be cmpxchg? */
181+
WRITE_ONCE(tablep[pts->index], entry);
182+
}
183+
pts->entry = entry;
184+
}
185+
#define pt_install_leaf_entry riscvpt_install_leaf_entry
186+
187+
static inline bool riscvpt_install_table(struct pt_state *pts,
188+
pt_oaddr_t table_pa,
189+
const struct pt_write_attrs *attrs)
190+
{
191+
pt_riscv_entry_t entry;
192+
193+
entry = RISCVPT_V |
194+
FIELD_PREP(RISCVPT_PPN, log2_div(table_pa, PT_GRANULE_LG2SZ));
195+
return pt_table_install64(pts, entry);
196+
}
197+
#define pt_install_table riscvpt_install_table
198+
199+
static inline void riscvpt_attr_from_entry(const struct pt_state *pts,
200+
struct pt_write_attrs *attrs)
201+
{
202+
attrs->descriptor_bits =
203+
pts->entry & (RISCVPT_R | RISCVPT_W | RISCVPT_X | RISCVPT_U |
204+
RISCVPT_G | RISCVPT_A | RISCVPT_D);
205+
}
206+
#define pt_attr_from_entry riscvpt_attr_from_entry
207+
208+
/* --- iommu */
209+
#include <linux/generic_pt/iommu.h>
210+
#include <linux/iommu.h>
211+
212+
#define pt_iommu_table pt_iommu_riscv_64
213+
214+
/* The common struct is in the per-format common struct */
215+
static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
216+
{
217+
return &container_of(iommu_table, struct pt_iommu_table, iommu)
218+
->riscv_64pt.common;
219+
}
220+
221+
static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
222+
{
223+
return &container_of(common, struct pt_iommu_table, riscv_64pt.common)
224+
->iommu;
225+
}
226+
227+
static inline int riscvpt_iommu_set_prot(struct pt_common *common,
228+
struct pt_write_attrs *attrs,
229+
unsigned int iommu_prot)
230+
{
231+
u64 pte;
232+
233+
pte = RISCVPT_A | RISCVPT_U;
234+
if (iommu_prot & IOMMU_WRITE)
235+
pte |= RISCVPT_W | RISCVPT_R | RISCVPT_D;
236+
if (iommu_prot & IOMMU_READ)
237+
pte |= RISCVPT_R;
238+
if (!(iommu_prot & IOMMU_NOEXEC))
239+
pte |= RISCVPT_X;
240+
241+
/* Caller must specify a supported combination of flags */
242+
if (unlikely((pte & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) == 0))
243+
return -EOPNOTSUPP;
244+
245+
attrs->descriptor_bits = pte;
246+
return 0;
247+
}
248+
#define pt_iommu_set_prot riscvpt_iommu_set_prot
249+
250+
static inline int
251+
riscvpt_iommu_fmt_init(struct pt_iommu_riscv_64 *iommu_table,
252+
const struct pt_iommu_riscv_64_cfg *cfg)
253+
{
254+
struct pt_riscv *table = &iommu_table->riscv_64pt;
255+
256+
switch (cfg->common.hw_max_vasz_lg2) {
257+
case 39:
258+
pt_top_set_level(&table->common, 2);
259+
break;
260+
case 48:
261+
pt_top_set_level(&table->common, 3);
262+
break;
263+
case 57:
264+
pt_top_set_level(&table->common, 4);
265+
break;
266+
default:
267+
return -EINVAL;
268+
}
269+
table->common.max_oasz_lg2 =
270+
min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
271+
return 0;
272+
}
273+
#define pt_iommu_fmt_init riscvpt_iommu_fmt_init
274+
275+
static inline void
276+
riscvpt_iommu_fmt_hw_info(struct pt_iommu_riscv_64 *table,
277+
const struct pt_range *top_range,
278+
struct pt_iommu_riscv_64_hw_info *info)
279+
{
280+
phys_addr_t top_phys = virt_to_phys(top_range->top_table);
281+
282+
info->ppn = oalog2_div(top_phys, PT_GRANULE_LG2SZ);
283+
PT_WARN_ON(top_phys & ~PT_TOP_PHYS_MASK);
284+
285+
/*
286+
* See Table 3. Encodings of iosatp.MODE field" for DC.tx.SXL = 0:
287+
* 8 = Sv39 = top level 2
288+
* 9 = Sv38 = top level 3
289+
* 10 = Sv57 = top level 4
290+
*/
291+
info->fsc_iosatp_mode = top_range->top_level + 6;
292+
}
293+
#define pt_iommu_fmt_hw_info riscvpt_iommu_fmt_hw_info
294+
295+
#if defined(GENERIC_PT_KUNIT)
296+
static const struct pt_iommu_riscv_64_cfg riscv_64_kunit_fmt_cfgs[] = {
297+
[0] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
298+
.common.hw_max_oasz_lg2 = 56,
299+
.common.hw_max_vasz_lg2 = 39 },
300+
[1] = { .common.features = 0,
301+
.common.hw_max_oasz_lg2 = 56,
302+
.common.hw_max_vasz_lg2 = 48 },
303+
[2] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
304+
.common.hw_max_oasz_lg2 = 56,
305+
.common.hw_max_vasz_lg2 = 57 },
306+
};
307+
#define kunit_fmt_cfgs riscv_64_kunit_fmt_cfgs
308+
enum {
309+
KUNIT_FMT_FEATURES = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
310+
};
311+
#endif
312+
313+
#endif

0 commit comments

Comments
 (0)