Skip to content

Commit a25864c

Browse files
vijaybalakrishnagregkh
authored andcommitted
arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones
commit 0314956 upstream. The following patches resulted in deferring crash kernel reservation to mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU), in particular Raspberry Pi 4. commit 1a8e1ce ("arm64: use both ZONE_DMA and ZONE_DMA32") commit 8424ecd ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges") commit 0a30c53 ("arm64: mm: Move reserve_crashkernel() into mem_init()") commit 2687275 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required") Above changes introduced boot slowdown due to linear map creation for all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1]. The proposed changes restore crash kernel reservation to earlier behavior thus avoids slow boot, particularly for platforms with IOMMU (no DMA memory zones). Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU and 8GB memory. Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm no regression to deferring scheme of crash kernel memory reservation. In both cases successfully collected kernel crash dump. [1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.microsoft.com/ Signed-off-by: Vijay Balakrishna <vijayb@linux.microsoft.com> Cc: stable@vger.kernel.org Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com> Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.microsoft.com [will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build] Signed-off-by: Will Deacon <will@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 558564d commit a25864c

2 files changed

Lines changed: 63 additions & 5 deletions

File tree

arch/arm64/mm/init.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,34 @@ EXPORT_SYMBOL(memstart_addr);
5858
* unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
5959
* In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
6060
* otherwise it is empty.
61+
*
62+
* Memory reservation for crash kernel either done early or deferred
63+
* depending on DMA memory zones configs (ZONE_DMA) --
64+
*
65+
* In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
66+
* here instead of max_zone_phys(). This lets early reservation of
67+
* crash kernel memory which has a dependency on arm64_dma_phys_limit.
68+
* Reserving memory early for crash kernel allows linear creation of block
69+
* mappings (greater than page-granularity) for all the memory bank rangs.
70+
* In this scheme a comparatively quicker boot is observed.
71+
*
72+
* If ZONE_DMA configs are defined, crash kernel memory reservation
73+
* is delayed until DMA zone memory range size initilazation performed in
74+
* zone_sizes_init(). The defer is necessary to steer clear of DMA zone
75+
* memory range to avoid overlap allocation. So crash kernel memory boundaries
76+
* are not known when mapping all bank memory ranges, which otherwise means
77+
* not possible to exclude crash kernel range from creating block mappings
78+
* so page-granularity mappings are created for the entire memory range.
79+
* Hence a slightly slower boot is observed.
80+
*
81+
* Note: Page-granularity mapppings are necessary for crash kernel memory
82+
* range for shrinking its size via /sys/kernel/kexec_crash_size interface.
6183
*/
62-
phys_addr_t arm64_dma_phys_limit __ro_after_init;
84+
#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
85+
phys_addr_t __ro_after_init arm64_dma_phys_limit;
86+
#else
87+
phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
88+
#endif
6389

6490
#ifdef CONFIG_KEXEC_CORE
6591
/*
@@ -210,8 +236,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
210236
if (!arm64_dma_phys_limit)
211237
arm64_dma_phys_limit = dma32_phys_limit;
212238
#endif
213-
if (!arm64_dma_phys_limit)
214-
arm64_dma_phys_limit = PHYS_MASK + 1;
215239
max_zone_pfns[ZONE_NORMAL] = max;
216240

217241
free_area_init(max_zone_pfns);
@@ -407,6 +431,9 @@ void __init arm64_memblock_init(void)
407431

408432
reserve_elfcorehdr();
409433

434+
if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
435+
reserve_crashkernel();
436+
410437
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
411438
}
412439

@@ -451,7 +478,8 @@ void __init bootmem_init(void)
451478
* request_standard_resources() depends on crashkernel's memory being
452479
* reserved, so do it here.
453480
*/
454-
reserve_crashkernel();
481+
if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
482+
reserve_crashkernel();
455483

456484
memblock_dump_all();
457485
}

arch/arm64/mm/mmu.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ static void __init map_mem(pgd_t *pgdp)
501501
int flags = 0;
502502
u64 i;
503503

504-
if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
504+
if (rodata_full || debug_pagealloc_enabled())
505505
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
506506

507507
/*
@@ -512,6 +512,17 @@ static void __init map_mem(pgd_t *pgdp)
512512
*/
513513
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
514514

515+
#ifdef CONFIG_KEXEC_CORE
516+
if (crash_mem_map) {
517+
if (IS_ENABLED(CONFIG_ZONE_DMA) ||
518+
IS_ENABLED(CONFIG_ZONE_DMA32))
519+
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
520+
else if (crashk_res.end)
521+
memblock_mark_nomap(crashk_res.start,
522+
resource_size(&crashk_res));
523+
}
524+
#endif
525+
515526
/* map all the memory banks */
516527
for_each_mem_range(i, &start, &end) {
517528
if (start >= end)
@@ -538,6 +549,25 @@ static void __init map_mem(pgd_t *pgdp)
538549
__map_memblock(pgdp, kernel_start, kernel_end,
539550
PAGE_KERNEL, NO_CONT_MAPPINGS);
540551
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
552+
553+
/*
554+
* Use page-level mappings here so that we can shrink the region
555+
* in page granularity and put back unused memory to buddy system
556+
* through /sys/kernel/kexec_crash_size interface.
557+
*/
558+
#ifdef CONFIG_KEXEC_CORE
559+
if (crash_mem_map &&
560+
!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
561+
if (crashk_res.end) {
562+
__map_memblock(pgdp, crashk_res.start,
563+
crashk_res.end + 1,
564+
PAGE_KERNEL,
565+
NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
566+
memblock_clear_nomap(crashk_res.start,
567+
resource_size(&crashk_res));
568+
}
569+
}
570+
#endif
541571
}
542572

543573
void mark_rodata_ro(void)

0 commit comments

Comments
 (0)