Skip to content

Commit f0395d5

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "7 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm, swap: fix race between swap count continuation operations mm/huge_memory.c: deposit page table when copying a PMD migration entry initramfs: fix initramfs rebuilds w/ compression after disabling fs/hugetlbfs/inode.c: fix hwpoison reserve accounting ocfs2: fstrim: Fix start offset of first cluster group during fstrim mm, /proc/pid/pagemap: fix soft dirty marking for PMD migration entry userfaultfd: hugetlbfs: prevent UFFDIO_COPY to fill beyond the end of i_size
2 parents fb615d6 + 2628bd6 commit f0395d5

8 files changed

Lines changed: 86 additions & 20 deletions

File tree

fs/hugetlbfs/inode.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,9 +842,12 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping,
842842
struct page *page)
843843
{
844844
struct inode *inode = mapping->host;
845+
pgoff_t index = page->index;
845846

846847
remove_huge_page(page);
847-
hugetlb_fix_reserve_counts(inode);
848+
if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
849+
hugetlb_fix_reserve_counts(inode);
850+
848851
return 0;
849852
}
850853

fs/ocfs2/alloc.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7304,21 +7304,32 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
73047304

73057305
static int ocfs2_trim_extent(struct super_block *sb,
73067306
struct ocfs2_group_desc *gd,
7307-
u32 start, u32 count)
7307+
u64 group, u32 start, u32 count)
73087308
{
73097309
u64 discard, bcount;
7310+
struct ocfs2_super *osb = OCFS2_SB(sb);
73107311

73117312
bcount = ocfs2_clusters_to_blocks(sb, count);
7312-
discard = le64_to_cpu(gd->bg_blkno) +
7313-
ocfs2_clusters_to_blocks(sb, start);
7313+
discard = ocfs2_clusters_to_blocks(sb, start);
7314+
7315+
/*
7316+
* For the first cluster group, the gd->bg_blkno is not at the start
7317+
* of the group, but at an offset from the start. If we add it while
7318+
* calculating discard for first group, we will wrongly start fstrim a
7319+
* few blocks after the desried start block and the range can cross
7320+
* over into the next cluster group. So, add it only if this is not
7321+
* the first cluster group.
7322+
*/
7323+
if (group != osb->first_cluster_group_blkno)
7324+
discard += le64_to_cpu(gd->bg_blkno);
73147325

73157326
trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
73167327

73177328
return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
73187329
}
73197330

73207331
static int ocfs2_trim_group(struct super_block *sb,
7321-
struct ocfs2_group_desc *gd,
7332+
struct ocfs2_group_desc *gd, u64 group,
73227333
u32 start, u32 max, u32 minbits)
73237334
{
73247335
int ret = 0, count = 0, next;
@@ -7337,7 +7348,7 @@ static int ocfs2_trim_group(struct super_block *sb,
73377348
next = ocfs2_find_next_bit(bitmap, max, start);
73387349

73397350
if ((next - start) >= minbits) {
7340-
ret = ocfs2_trim_extent(sb, gd,
7351+
ret = ocfs2_trim_extent(sb, gd, group,
73417352
start, next - start);
73427353
if (ret < 0) {
73437354
mlog_errno(ret);
@@ -7435,7 +7446,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
74357446
}
74367447

74377448
gd = (struct ocfs2_group_desc *)gd_bh->b_data;
7438-
cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
7449+
cnt = ocfs2_trim_group(sb, gd, group,
7450+
first_bit, last_bit, minlen);
74397451
brelse(gd_bh);
74407452
gd_bh = NULL;
74417453
if (cnt < 0) {

fs/proc/task_mmu.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1311,13 +1311,15 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
13111311
pmd_t pmd = *pmdp;
13121312
struct page *page = NULL;
13131313

1314-
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
1314+
if (vma->vm_flags & VM_SOFTDIRTY)
13151315
flags |= PM_SOFT_DIRTY;
13161316

13171317
if (pmd_present(pmd)) {
13181318
page = pmd_page(pmd);
13191319

13201320
flags |= PM_PRESENT;
1321+
if (pmd_soft_dirty(pmd))
1322+
flags |= PM_SOFT_DIRTY;
13211323
if (pm->show_pfn)
13221324
frame = pmd_pfn(pmd) +
13231325
((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1329,6 +1331,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
13291331
frame = swp_type(entry) |
13301332
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
13311333
flags |= PM_SWAP;
1334+
if (pmd_swp_soft_dirty(pmd))
1335+
flags |= PM_SOFT_DIRTY;
13321336
VM_BUG_ON(!is_pmd_migration_entry(pmd));
13331337
page = migration_entry_to_page(entry);
13341338
}

include/linux/swap.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,10 @@ struct swap_info_struct {
266266
* both locks need hold, hold swap_lock
267267
* first.
268268
*/
269+
spinlock_t cont_lock; /*
270+
* protect swap count continuation page
271+
* list.
272+
*/
269273
struct work_struct discard_work; /* discard worker */
270274
struct swap_cluster_list discard_clusters; /* discard clusters list */
271275
};

mm/huge_memory.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
941941
pmd = pmd_swp_mksoft_dirty(pmd);
942942
set_pmd_at(src_mm, addr, src_pmd, pmd);
943943
}
944+
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
945+
atomic_long_inc(&dst_mm->nr_ptes);
946+
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
944947
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
945948
ret = 0;
946949
goto out_unlock;

mm/hugetlb.c

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3984,6 +3984,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
39843984
unsigned long src_addr,
39853985
struct page **pagep)
39863986
{
3987+
struct address_space *mapping;
3988+
pgoff_t idx;
3989+
unsigned long size;
39873990
int vm_shared = dst_vma->vm_flags & VM_SHARED;
39883991
struct hstate *h = hstate_vma(dst_vma);
39893992
pte_t _dst_pte;
@@ -4021,13 +4024,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
40214024
__SetPageUptodate(page);
40224025
set_page_huge_active(page);
40234026

4027+
mapping = dst_vma->vm_file->f_mapping;
4028+
idx = vma_hugecache_offset(h, dst_vma, dst_addr);
4029+
40244030
/*
40254031
* If shared, add to page cache
40264032
*/
40274033
if (vm_shared) {
4028-
struct address_space *mapping = dst_vma->vm_file->f_mapping;
4029-
pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
4034+
size = i_size_read(mapping->host) >> huge_page_shift(h);
4035+
ret = -EFAULT;
4036+
if (idx >= size)
4037+
goto out_release_nounlock;
40304038

4039+
/*
4040+
* Serialization between remove_inode_hugepages() and
4041+
* huge_add_to_page_cache() below happens through the
4042+
* hugetlb_fault_mutex_table that here must be hold by
4043+
* the caller.
4044+
*/
40314045
ret = huge_add_to_page_cache(page, mapping, idx);
40324046
if (ret)
40334047
goto out_release_nounlock;
@@ -4036,6 +4050,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
40364050
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
40374051
spin_lock(ptl);
40384052

4053+
/*
4054+
* Recheck the i_size after holding PT lock to make sure not
4055+
* to leave any page mapped (as page_mapped()) beyond the end
4056+
* of the i_size (remove_inode_hugepages() is strict about
4057+
* enforcing that). If we bail out here, we'll also leave a
4058+
* page in the radix tree in the vm_shared case beyond the end
4059+
* of the i_size, but remove_inode_hugepages() will take care
4060+
* of it as soon as we drop the hugetlb_fault_mutex_table.
4061+
*/
4062+
size = i_size_read(mapping->host) >> huge_page_shift(h);
4063+
ret = -EFAULT;
4064+
if (idx >= size)
4065+
goto out_release_unlock;
4066+
40394067
ret = -EEXIST;
40404068
if (!huge_pte_none(huge_ptep_get(dst_pte)))
40414069
goto out_release_unlock;

mm/swapfile.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,6 +2869,7 @@ static struct swap_info_struct *alloc_swap_info(void)
28692869
p->flags = SWP_USED;
28702870
spin_unlock(&swap_lock);
28712871
spin_lock_init(&p->lock);
2872+
spin_lock_init(&p->cont_lock);
28722873

28732874
return p;
28742875
}
@@ -3545,6 +3546,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
35453546
head = vmalloc_to_page(si->swap_map + offset);
35463547
offset &= ~PAGE_MASK;
35473548

3549+
spin_lock(&si->cont_lock);
35483550
/*
35493551
* Page allocation does not initialize the page's lru field,
35503552
* but it does always reset its private field.
@@ -3564,7 +3566,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
35643566
* a continuation page, free our allocation and use this one.
35653567
*/
35663568
if (!(count & COUNT_CONTINUED))
3567-
goto out;
3569+
goto out_unlock_cont;
35683570

35693571
map = kmap_atomic(list_page) + offset;
35703572
count = *map;
@@ -3575,11 +3577,13 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
35753577
* free our allocation and use this one.
35763578
*/
35773579
if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
3578-
goto out;
3580+
goto out_unlock_cont;
35793581
}
35803582

35813583
list_add_tail(&page->lru, &head->lru);
35823584
page = NULL; /* now it's attached, don't free it */
3585+
out_unlock_cont:
3586+
spin_unlock(&si->cont_lock);
35833587
out:
35843588
unlock_cluster(ci);
35853589
spin_unlock(&si->lock);
@@ -3604,13 +3608,15 @@ static bool swap_count_continued(struct swap_info_struct *si,
36043608
struct page *head;
36053609
struct page *page;
36063610
unsigned char *map;
3611+
bool ret;
36073612

36083613
head = vmalloc_to_page(si->swap_map + offset);
36093614
if (page_private(head) != SWP_CONTINUED) {
36103615
BUG_ON(count & COUNT_CONTINUED);
36113616
return false; /* need to add count continuation */
36123617
}
36133618

3619+
spin_lock(&si->cont_lock);
36143620
offset &= ~PAGE_MASK;
36153621
page = list_entry(head->lru.next, struct page, lru);
36163622
map = kmap_atomic(page) + offset;
@@ -3631,8 +3637,10 @@ static bool swap_count_continued(struct swap_info_struct *si,
36313637
if (*map == SWAP_CONT_MAX) {
36323638
kunmap_atomic(map);
36333639
page = list_entry(page->lru.next, struct page, lru);
3634-
if (page == head)
3635-
return false; /* add count continuation */
3640+
if (page == head) {
3641+
ret = false; /* add count continuation */
3642+
goto out;
3643+
}
36363644
map = kmap_atomic(page) + offset;
36373645
init_map: *map = 0; /* we didn't zero the page */
36383646
}
@@ -3645,7 +3653,7 @@ init_map: *map = 0; /* we didn't zero the page */
36453653
kunmap_atomic(map);
36463654
page = list_entry(page->lru.prev, struct page, lru);
36473655
}
3648-
return true; /* incremented */
3656+
ret = true; /* incremented */
36493657

36503658
} else { /* decrementing */
36513659
/*
@@ -3671,8 +3679,11 @@ init_map: *map = 0; /* we didn't zero the page */
36713679
kunmap_atomic(map);
36723680
page = list_entry(page->lru.prev, struct page, lru);
36733681
}
3674-
return count == COUNT_CONTINUED;
3682+
ret = count == COUNT_CONTINUED;
36753683
}
3684+
out:
3685+
spin_unlock(&si->cont_lock);
3686+
return ret;
36763687
}
36773688

36783689
/*

usr/Makefile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ PHONY += klibcdirs
88

99
suffix_y = $(subst $\",,$(CONFIG_INITRAMFS_COMPRESSION))
1010
datafile_y = initramfs_data.cpio$(suffix_y)
11+
datafile_d_y = .$(datafile_y).d
1112
AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)"
1213

1314

@@ -30,12 +31,12 @@ ramfs-args := \
3031
$(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \
3132
$(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID))
3233

33-
# .initramfs_data.cpio.d is used to identify all files included
34+
# $(datafile_d_y) is used to identify all files included
3435
# in initramfs and to detect if any files are added/removed.
3536
# Removed files are identified by directory timestamp being updated
3637
# The dependency list is generated by gen_initramfs.sh -l
37-
ifneq ($(wildcard $(obj)/.initramfs_data.cpio.d),)
38-
include $(obj)/.initramfs_data.cpio.d
38+
ifneq ($(wildcard $(obj)/$(datafile_d_y)),)
39+
include $(obj)/$(datafile_d_y)
3940
endif
4041

4142
quiet_cmd_initfs = GEN $@
@@ -53,5 +54,5 @@ $(deps_initramfs): klibcdirs
5354
# 3) If gen_init_cpio are newer than initramfs_data.cpio
5455
# 4) arguments to gen_initramfs.sh changes
5556
$(obj)/$(datafile_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs
56-
$(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d
57+
$(Q)$(initramfs) -l $(ramfs-input) > $(obj)/$(datafile_d_y)
5758
$(call if_changed,initfs)

0 commit comments

Comments
 (0)