Skip to content

Commit db0062d

Browse files
rpptakpm00
authored andcommitted
userfaultfd: introduce struct mfill_state
mfill_atomic() passes a lot of parameters down to its callees. Aggregate them all into mfill_state structure and pass this structure to functions that implement various UFFDIO_ commands. Tracking the state in a structure will allow moving the code that retries copying of data for UFFDIO_COPY into mfill_atomic_pte_copy() and make the loop in mfill_atomic() identical for all UFFDIO operations on PTE-mapped memory. The mfill_state definition is deliberately local to mm/userfaultfd.c, hence shmem_mfill_atomic_pte() is not updated. [harry.yoo@oracle.com: properly initialize mfill_state.len to fix folio_add_new_anon_rmap() WARN] Link: https://lore.kernel.org/abehBY7QakYF9bK4@hyeyoo Link: https://lore.kernel.org/20260402041156.1377214-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Signed-off-by: Harry Yoo <harry.yoo@oracle.com> Acked-by: David Hildenbrand (Arm) <david@kernel.org> Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrei Vagin <avagin@google.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Harry Yoo (Oracle) <harry@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: James Houghton <jthoughton@google.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Muchun Song <muchun.song@linux.dev> Cc: Nikita Kalyazin <kalyazin@amazon.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Xu <peterx@redhat.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: David Carlier <devnexen@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent c062048 commit db0062d

1 file changed

Lines changed: 81 additions & 66 deletions

File tree

mm/userfaultfd.c

Lines changed: 81 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,20 @@
2020
#include "internal.h"
2121
#include "swap.h"
2222

23+
struct mfill_state {
24+
struct userfaultfd_ctx *ctx;
25+
unsigned long src_start;
26+
unsigned long dst_start;
27+
unsigned long len;
28+
uffd_flags_t flags;
29+
30+
struct vm_area_struct *vma;
31+
unsigned long src_addr;
32+
unsigned long dst_addr;
33+
struct folio *folio;
34+
pmd_t *pmd;
35+
};
36+
2337
static __always_inline
2438
bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end)
2539
{
@@ -272,17 +286,17 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
272286
return ret;
273287
}
274288

275-
static int mfill_atomic_pte_copy(pmd_t *dst_pmd,
276-
struct vm_area_struct *dst_vma,
277-
unsigned long dst_addr,
278-
unsigned long src_addr,
279-
uffd_flags_t flags,
280-
struct folio **foliop)
289+
static int mfill_atomic_pte_copy(struct mfill_state *state)
281290
{
282-
int ret;
291+
struct vm_area_struct *dst_vma = state->vma;
292+
unsigned long dst_addr = state->dst_addr;
293+
unsigned long src_addr = state->src_addr;
294+
uffd_flags_t flags = state->flags;
295+
pmd_t *dst_pmd = state->pmd;
283296
struct folio *folio;
297+
int ret;
284298

285-
if (!*foliop) {
299+
if (!state->folio) {
286300
ret = -ENOMEM;
287301
folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma,
288302
dst_addr);
@@ -294,13 +308,13 @@ static int mfill_atomic_pte_copy(pmd_t *dst_pmd,
294308
/* fallback to copy_from_user outside mmap_lock */
295309
if (unlikely(ret)) {
296310
ret = -ENOENT;
297-
*foliop = folio;
311+
state->folio = folio;
298312
/* don't free the page */
299313
goto out;
300314
}
301315
} else {
302-
folio = *foliop;
303-
*foliop = NULL;
316+
folio = state->folio;
317+
state->folio = NULL;
304318
}
305319

306320
/*
@@ -357,10 +371,11 @@ static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd,
357371
return ret;
358372
}
359373

360-
static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
361-
struct vm_area_struct *dst_vma,
362-
unsigned long dst_addr)
374+
static int mfill_atomic_pte_zeropage(struct mfill_state *state)
363375
{
376+
struct vm_area_struct *dst_vma = state->vma;
377+
unsigned long dst_addr = state->dst_addr;
378+
pmd_t *dst_pmd = state->pmd;
364379
pte_t _dst_pte, *dst_pte;
365380
spinlock_t *ptl;
366381
int ret;
@@ -392,13 +407,14 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
392407
}
393408

394409
/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
395-
static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
396-
struct vm_area_struct *dst_vma,
397-
unsigned long dst_addr,
398-
uffd_flags_t flags)
410+
static int mfill_atomic_pte_continue(struct mfill_state *state)
399411
{
400-
struct inode *inode = file_inode(dst_vma->vm_file);
412+
struct vm_area_struct *dst_vma = state->vma;
413+
unsigned long dst_addr = state->dst_addr;
401414
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
415+
struct inode *inode = file_inode(dst_vma->vm_file);
416+
uffd_flags_t flags = state->flags;
417+
pmd_t *dst_pmd = state->pmd;
402418
struct folio *folio;
403419
struct page *page;
404420
int ret;
@@ -436,15 +452,15 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
436452
}
437453

438454
/* Handles UFFDIO_POISON for all non-hugetlb VMAs. */
439-
static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
440-
struct vm_area_struct *dst_vma,
441-
unsigned long dst_addr,
442-
uffd_flags_t flags)
455+
static int mfill_atomic_pte_poison(struct mfill_state *state)
443456
{
444-
int ret;
457+
struct vm_area_struct *dst_vma = state->vma;
445458
struct mm_struct *dst_mm = dst_vma->vm_mm;
459+
unsigned long dst_addr = state->dst_addr;
460+
pmd_t *dst_pmd = state->pmd;
446461
pte_t _dst_pte, *dst_pte;
447462
spinlock_t *ptl;
463+
int ret;
448464

449465
_dst_pte = make_pte_marker(PTE_MARKER_POISONED);
450466
ret = -EAGAIN;
@@ -668,22 +684,20 @@ extern ssize_t mfill_atomic_hugetlb(struct userfaultfd_ctx *ctx,
668684
uffd_flags_t flags);
669685
#endif /* CONFIG_HUGETLB_PAGE */
670686

671-
static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
672-
struct vm_area_struct *dst_vma,
673-
unsigned long dst_addr,
674-
unsigned long src_addr,
675-
uffd_flags_t flags,
676-
struct folio **foliop)
687+
static __always_inline ssize_t mfill_atomic_pte(struct mfill_state *state)
677688
{
689+
struct vm_area_struct *dst_vma = state->vma;
690+
unsigned long src_addr = state->src_addr;
691+
unsigned long dst_addr = state->dst_addr;
692+
struct folio **foliop = &state->folio;
693+
uffd_flags_t flags = state->flags;
694+
pmd_t *dst_pmd = state->pmd;
678695
ssize_t err;
679696

680-
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) {
681-
return mfill_atomic_pte_continue(dst_pmd, dst_vma,
682-
dst_addr, flags);
683-
} else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
684-
return mfill_atomic_pte_poison(dst_pmd, dst_vma,
685-
dst_addr, flags);
686-
}
697+
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
698+
return mfill_atomic_pte_continue(state);
699+
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON))
700+
return mfill_atomic_pte_poison(state);
687701

688702
/*
689703
* The normal page fault path for a shmem will invoke the
@@ -697,12 +711,9 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
697711
*/
698712
if (!(dst_vma->vm_flags & VM_SHARED)) {
699713
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY))
700-
err = mfill_atomic_pte_copy(dst_pmd, dst_vma,
701-
dst_addr, src_addr,
702-
flags, foliop);
714+
err = mfill_atomic_pte_copy(state);
703715
else
704-
err = mfill_atomic_pte_zeropage(dst_pmd,
705-
dst_vma, dst_addr);
716+
err = mfill_atomic_pte_zeropage(state);
706717
} else {
707718
err = shmem_mfill_atomic_pte(dst_pmd, dst_vma,
708719
dst_addr, src_addr,
@@ -718,13 +729,20 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
718729
unsigned long len,
719730
uffd_flags_t flags)
720731
{
732+
struct mfill_state state = (struct mfill_state){
733+
.ctx = ctx,
734+
.dst_start = dst_start,
735+
.src_start = src_start,
736+
.flags = flags,
737+
.len = len,
738+
.src_addr = src_start,
739+
.dst_addr = dst_start,
740+
};
721741
struct mm_struct *dst_mm = ctx->mm;
722742
struct vm_area_struct *dst_vma;
743+
long copied = 0;
723744
ssize_t err;
724745
pmd_t *dst_pmd;
725-
unsigned long src_addr, dst_addr;
726-
long copied;
727-
struct folio *folio;
728746

729747
/*
730748
* Sanitize the command parameters:
@@ -736,10 +754,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
736754
VM_WARN_ON_ONCE(src_start + len <= src_start);
737755
VM_WARN_ON_ONCE(dst_start + len <= dst_start);
738756

739-
src_addr = src_start;
740-
dst_addr = dst_start;
741-
copied = 0;
742-
folio = NULL;
743757
retry:
744758
/*
745759
* Make sure the vma is not shared, that the dst range is
@@ -750,6 +764,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
750764
err = PTR_ERR(dst_vma);
751765
goto out;
752766
}
767+
state.vma = dst_vma;
753768

754769
/*
755770
* If memory mappings are changing because of non-cooperative
@@ -790,12 +805,12 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
790805
uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
791806
goto out_unlock;
792807

793-
while (src_addr < src_start + len) {
794-
pmd_t dst_pmdval;
808+
while (state.src_addr < src_start + len) {
809+
VM_WARN_ON_ONCE(state.dst_addr >= dst_start + len);
795810

796-
VM_WARN_ON_ONCE(dst_addr >= dst_start + len);
811+
pmd_t dst_pmdval;
797812

798-
dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
813+
dst_pmd = mm_alloc_pmd(dst_mm, state.dst_addr);
799814
if (unlikely(!dst_pmd)) {
800815
err = -ENOMEM;
801816
break;
@@ -827,34 +842,34 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
827842
* tables under us; pte_offset_map_lock() will deal with that.
828843
*/
829844

830-
err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
831-
src_addr, flags, &folio);
845+
state.pmd = dst_pmd;
846+
err = mfill_atomic_pte(&state);
832847
cond_resched();
833848

834849
if (unlikely(err == -ENOENT)) {
835850
void *kaddr;
836851

837852
up_read(&ctx->map_changing_lock);
838-
uffd_mfill_unlock(dst_vma);
839-
VM_WARN_ON_ONCE(!folio);
853+
uffd_mfill_unlock(state.vma);
854+
VM_WARN_ON_ONCE(!state.folio);
840855

841-
kaddr = kmap_local_folio(folio, 0);
856+
kaddr = kmap_local_folio(state.folio, 0);
842857
err = copy_from_user(kaddr,
843-
(const void __user *) src_addr,
858+
(const void __user *)state.src_addr,
844859
PAGE_SIZE);
845860
kunmap_local(kaddr);
846861
if (unlikely(err)) {
847862
err = -EFAULT;
848863
goto out;
849864
}
850-
flush_dcache_folio(folio);
865+
flush_dcache_folio(state.folio);
851866
goto retry;
852867
} else
853-
VM_WARN_ON_ONCE(folio);
868+
VM_WARN_ON_ONCE(state.folio);
854869

855870
if (!err) {
856-
dst_addr += PAGE_SIZE;
857-
src_addr += PAGE_SIZE;
871+
state.dst_addr += PAGE_SIZE;
872+
state.src_addr += PAGE_SIZE;
858873
copied += PAGE_SIZE;
859874

860875
if (fatal_signal_pending(current))
@@ -866,10 +881,10 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
866881

867882
out_unlock:
868883
up_read(&ctx->map_changing_lock);
869-
uffd_mfill_unlock(dst_vma);
884+
uffd_mfill_unlock(state.vma);
870885
out:
871-
if (folio)
872-
folio_put(folio);
886+
if (state.folio)
887+
folio_put(state.folio);
873888
VM_WARN_ON_ONCE(copied < 0);
874889
VM_WARN_ON_ONCE(err > 0);
875890
VM_WARN_ON_ONCE(!copied && !err);

0 commit comments

Comments
 (0)