Skip to content

Commit 7b4dde5

Browse files
tobias-huschleVasily Gorbik
authored andcommitted
s390/pgtable: Add s390 support for page table check
Add page table check hooks into routines that modify user page tables. Unlike other architectures s390 does not have means to distinguish between kernel and user page table entries. Rely on the fact the page table check infrastructure itself operates on non-init_mm memory spaces only. Use the provided mm_struct to verify that the memory space is not init_mm (aka not the kernel memory space) indeed. That check is supposed to be succeeded already (on some code paths even twice). If the passed memory space by contrast is init_mm that would be an unexpected semantical change in generic code, so do VM_BUG_ON() in such case. Unset _SEGMENT_ENTRY_READ bit to indicate that pmdp_invalidate() was applied against a huge PMD and is going to be updated by set_pmd_at() shortly. The hook pmd_user_accessible_page() should skip such entries until that, otherwise the page table accounting falls apart and BUG_ON() gets hit as result. The invalidated huge PMD entry should not be confused with a PROT_NONE entry as reported by pmd_protnone(), though the entry characteristics exactly match: _SEGMENT_ENTRY_LARGE is set while _SEGMENT_ENTRY_READ is unset. Since pmd_protnone() implementation depends on NUMA_BALANCING configuration option, it should not be used in pmd_user_accessible_page() check, which is expected to be CONFIG_NUMA_BALANCING-agnostic. Nevertheless, an invalidated huge PMD is technically still pmd_protnone() entry and it should not break other code paths once _SEGMENT_ENTRY_READ is unset. As of now, all pmd_protnone() checks are done under page table locks or exercise GUP-fast and HMM code paths, which are expected to be safe against concurrent page table updates. Alternative approach would be using the last remaining unused PMD entry bit 0x800 to indicate that pmdp_invalidate() was called on a PMD. That would allow avoiding collisions with pmd_protnone() handling code paths, but saving the bit is more preferable way to go. Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Tobias Huschle <huschle@linux.ibm.com> Co-developed-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com> Link: https://lore.kernel.org/r/4db8a681205bd555298d62441cdcfca43317a35a.1772812343.git.agordeev@linux.ibm.com Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
1 parent 2f34c2e commit 7b4dde5

2 files changed

Lines changed: 51 additions & 4 deletions

File tree

arch/s390/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ config S390
154154
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
155155
select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
156156
select ARCH_SUPPORTS_NUMA_BALANCING
157+
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
157158
select ARCH_SUPPORTS_PER_VMA_LOCK
158159
select ARCH_USE_BUILTIN_BSWAP
159160
select ARCH_USE_CMPXCHG_LOCKREF

arch/s390/include/asm/pgtable.h

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
#include <linux/mm_types.h>
1717
#include <linux/cpufeature.h>
1818
#include <linux/page-flags.h>
19+
#include <linux/page_table_check.h>
1920
#include <linux/radix-tree.h>
2021
#include <linux/atomic.h>
22+
#include <linux/mmap_lock.h>
2123
#include <asm/ctlreg.h>
2224
#include <asm/bug.h>
2325
#include <asm/page.h>
@@ -1190,6 +1192,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
11901192
/* At this point the reference through the mapping is still present */
11911193
if (mm_is_protected(mm) && pte_present(res))
11921194
WARN_ON_ONCE(uv_convert_from_secure_pte(res));
1195+
page_table_check_pte_clear(mm, addr, res);
11931196
return res;
11941197
}
11951198

@@ -1208,6 +1211,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
12081211
/* At this point the reference through the mapping is still present */
12091212
if (mm_is_protected(vma->vm_mm) && pte_present(res))
12101213
WARN_ON_ONCE(uv_convert_from_secure_pte(res));
1214+
page_table_check_pte_clear(vma->vm_mm, addr, res);
12111215
return res;
12121216
}
12131217

@@ -1231,6 +1235,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
12311235
} else {
12321236
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
12331237
}
1238+
1239+
page_table_check_pte_clear(mm, addr, res);
1240+
12341241
/* Nothing to do */
12351242
if (!mm_is_protected(mm) || !pte_present(res))
12361243
return res;
@@ -1327,6 +1334,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
13271334
{
13281335
if (pte_present(entry))
13291336
entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));
1337+
page_table_check_ptes_set(mm, addr, ptep, entry, nr);
13301338
for (;;) {
13311339
set_pte(ptep, entry);
13321340
if (--nr == 0)
@@ -1703,6 +1711,7 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
17031711
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
17041712
pmd_t *pmdp, pmd_t entry)
17051713
{
1714+
page_table_check_pmd_set(mm, addr, pmdp, entry);
17061715
set_pmd(pmdp, entry);
17071716
}
17081717

@@ -1717,20 +1726,29 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
17171726
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
17181727
unsigned long addr, pmd_t *pmdp)
17191728
{
1720-
return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1729+
pmd_t pmd;
1730+
1731+
pmd = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1732+
page_table_check_pmd_clear(mm, addr, pmd);
1733+
return pmd;
17211734
}
17221735

17231736
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
17241737
static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
17251738
unsigned long addr,
17261739
pmd_t *pmdp, int full)
17271740
{
1741+
pmd_t pmd;
1742+
17281743
if (full) {
1729-
pmd_t pmd = *pmdp;
1744+
pmd = *pmdp;
17301745
set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1746+
page_table_check_pmd_clear(vma->vm_mm, addr, pmd);
17311747
return pmd;
17321748
}
1733-
return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1749+
pmd = pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1750+
page_table_check_pmd_clear(vma->vm_mm, addr, pmd);
1751+
return pmd;
17341752
}
17351753

17361754
#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
@@ -1748,7 +1766,12 @@ static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma,
17481766

17491767
VM_WARN_ON_ONCE(!pmd_present(pmd));
17501768
pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
1751-
return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
1769+
#ifdef CONFIG_PAGE_TABLE_CHECK
1770+
pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_READ));
1771+
#endif
1772+
page_table_check_pmd_set(vma->vm_mm, addr, pmdp, pmd);
1773+
pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
1774+
return pmd;
17521775
}
17531776

17541777
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -1783,6 +1806,29 @@ static inline int has_transparent_hugepage(void)
17831806
}
17841807
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
17851808

1809+
#ifdef CONFIG_PAGE_TABLE_CHECK
1810+
static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte)
1811+
{
1812+
VM_BUG_ON(mm == &init_mm);
1813+
1814+
return pte_present(pte);
1815+
}
1816+
1817+
static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd)
1818+
{
1819+
VM_BUG_ON(mm == &init_mm);
1820+
1821+
return pmd_leaf(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_READ);
1822+
}
1823+
1824+
static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud)
1825+
{
1826+
VM_BUG_ON(mm == &init_mm);
1827+
1828+
return pud_leaf(pud);
1829+
}
1830+
#endif
1831+
17861832
/*
17871833
* 64 bit swap entry format:
17881834
* A page-table entry has some bits we have to treat in a special way.

0 commit comments

Comments
 (0)