Skip to content

Commit 4c640eb

Browse files
David Hildenbrand (Red Hat)akpm00
authored andcommitted
mm: move pte table reclaim code to memory.c
Some cleanups for PT table reclaim code, triggered by a false-positive warning we might start to see soon after we unlocked pt-reclaim on architectures besides x86-64. This patch (of 2): The pte-table reclaim code is only called from memory.c, while zapping pages, and it better also stays that way in the long run. If we ever have to call it from other files, we should expose proper high-level helpers for zapping if the existing helpers are not good enough. So, let's move the code over (it's not a lot) and slightly clean it up a bit by: - Renaming the functions. - Dropping the "Check if it is empty PTE page" comment, which is now self-explaining given the function name. - Making zap_pte_table_if_empty() return whether zapping worked so the caller can free it. - Adding a comment in pte_table_reclaim_possible(). - Inlining free_pte() in the last remaining user. - In zap_empty_pte_table(), switch from pmdp_get_lcokless() to pmd_clear(), we are holding the PMD PT lock. By moving the code over, compilers can also easily figure out when zap_empty_pte_table() does not initialize the pmdval variable, avoiding false-positive warnings about the variable possibly not being initialized. Link: https://lkml.kernel.org/r/20260119220708.3438514-1-david@kernel.org Link: https://lkml.kernel.org/r/20260119220708.3438514-2-david@kernel.org Signed-off-by: David Hildenbrand (Red Hat) <david@kernel.org> Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 9c8c02d commit 4c640eb

5 files changed

Lines changed: 62 additions & 98 deletions

File tree

MAINTAINERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16696,7 +16696,6 @@ R: Shakeel Butt <shakeel.butt@linux.dev>
1669616696
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
1669716697
L: linux-mm@kvack.org
1669816698
S: Maintained
16699-
F: mm/pt_reclaim.c
1670016699
F: mm/vmscan.c
1670116700
F: mm/workingset.c
1670216701

mm/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,5 +146,4 @@ obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
146146
obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
147147
obj-$(CONFIG_EXECMEM) += execmem.o
148148
obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o
149-
obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o
150149
obj-$(CONFIG_LAZY_MMU_MODE_KUNIT_TEST) += tests/lazy_mmu_mode_kunit.o

mm/internal.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,24 +1743,6 @@ int walk_page_range_debug(struct mm_struct *mm, unsigned long start,
17431743
unsigned long end, const struct mm_walk_ops *ops,
17441744
pgd_t *pgd, void *private);
17451745

1746-
/* pt_reclaim.c */
1747-
bool try_get_and_clear_pmd(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval);
1748-
void free_pte(struct mm_struct *mm, unsigned long addr, struct mmu_gather *tlb,
1749-
pmd_t pmdval);
1750-
void try_to_free_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr,
1751-
struct mmu_gather *tlb);
1752-
1753-
#ifdef CONFIG_PT_RECLAIM
1754-
bool reclaim_pt_is_enabled(unsigned long start, unsigned long end,
1755-
struct zap_details *details);
1756-
#else
1757-
static inline bool reclaim_pt_is_enabled(unsigned long start, unsigned long end,
1758-
struct zap_details *details)
1759-
{
1760-
return false;
1761-
}
1762-
#endif /* CONFIG_PT_RECLAIM */
1763-
17641746
void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm);
17651747
int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
17661748

mm/memory.c

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,11 +1821,68 @@ static inline int do_zap_pte_range(struct mmu_gather *tlb,
18211821
return nr;
18221822
}
18231823

1824+
static bool pte_table_reclaim_possible(unsigned long start, unsigned long end,
1825+
struct zap_details *details)
1826+
{
1827+
if (!IS_ENABLED(CONFIG_PT_RECLAIM))
1828+
return false;
1829+
/* Only zap if we are allowed to and cover the full page table. */
1830+
return details && details->reclaim_pt && (end - start >= PMD_SIZE);
1831+
}
1832+
1833+
static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval)
1834+
{
1835+
spinlock_t *pml = pmd_lockptr(mm, pmd);
1836+
1837+
if (!spin_trylock(pml))
1838+
return false;
1839+
1840+
*pmdval = pmdp_get(pmd);
1841+
pmd_clear(pmd);
1842+
spin_unlock(pml);
1843+
return true;
1844+
}
1845+
1846+
static bool zap_pte_table_if_empty(struct mm_struct *mm, pmd_t *pmd,
1847+
unsigned long addr, pmd_t *pmdval)
1848+
{
1849+
spinlock_t *pml, *ptl = NULL;
1850+
pte_t *start_pte, *pte;
1851+
int i;
1852+
1853+
pml = pmd_lock(mm, pmd);
1854+
start_pte = pte_offset_map_rw_nolock(mm, pmd, addr, pmdval, &ptl);
1855+
if (!start_pte)
1856+
goto out_ptl;
1857+
if (ptl != pml)
1858+
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
1859+
1860+
for (i = 0, pte = start_pte; i < PTRS_PER_PTE; i++, pte++) {
1861+
if (!pte_none(ptep_get(pte)))
1862+
goto out_ptl;
1863+
}
1864+
pte_unmap(start_pte);
1865+
1866+
pmd_clear(pmd);
1867+
1868+
if (ptl != pml)
1869+
spin_unlock(ptl);
1870+
spin_unlock(pml);
1871+
return true;
1872+
out_ptl:
1873+
if (start_pte)
1874+
pte_unmap_unlock(start_pte, ptl);
1875+
if (ptl != pml)
1876+
spin_unlock(pml);
1877+
return false;
1878+
}
1879+
18241880
static unsigned long zap_pte_range(struct mmu_gather *tlb,
18251881
struct vm_area_struct *vma, pmd_t *pmd,
18261882
unsigned long addr, unsigned long end,
18271883
struct zap_details *details)
18281884
{
1885+
bool can_reclaim_pt = pte_table_reclaim_possible(addr, end, details);
18291886
bool force_flush = false, force_break = false;
18301887
struct mm_struct *mm = tlb->mm;
18311888
int rss[NR_MM_COUNTERS];
@@ -1834,7 +1891,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
18341891
pte_t *pte;
18351892
pmd_t pmdval;
18361893
unsigned long start = addr;
1837-
bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details);
18381894
bool direct_reclaim = true;
18391895
int nr;
18401896

@@ -1875,7 +1931,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
18751931
* from being repopulated by another thread.
18761932
*/
18771933
if (can_reclaim_pt && direct_reclaim && addr == end)
1878-
direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval);
1934+
direct_reclaim = zap_empty_pte_table(mm, pmd, &pmdval);
18791935

18801936
add_mm_rss_vec(mm, rss);
18811937
lazy_mmu_mode_disable();
@@ -1904,10 +1960,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
19041960
}
19051961

19061962
if (can_reclaim_pt) {
1907-
if (direct_reclaim)
1908-
free_pte(mm, start, tlb, pmdval);
1909-
else
1910-
try_to_free_pte(mm, pmd, start, tlb);
1963+
if (direct_reclaim || zap_pte_table_if_empty(mm, pmd, start, &pmdval)) {
1964+
pte_free_tlb(tlb, pmd_pgtable(pmdval), addr);
1965+
mm_dec_nr_ptes(mm);
1966+
}
19111967
}
19121968

19131969
return addr;

mm/pt_reclaim.c

Lines changed: 0 additions & 72 deletions
This file was deleted.

0 commit comments

Comments
 (0)