Merge tag 'mm-hotfixes-stable-2026-01-15-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton: - kerneldoc fixes from Bagas Sanjaya - DAMON fixes from SeongJae - mremap VMA-related fixes from Lorenzo - various singletons - please see the changelogs for details * tag 'mm-hotfixes-stable-2026-01-15-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (30 commits) drivers/dax: add some missing kerneldoc comment fields for struct dev_dax mm: numa,memblock: include <asm/numa.h> for 'numa_nodes_parsed' mailmap: add entry for Daniel Thompson tools/testing/selftests: fix gup_longterm for unknown fs mm/page_alloc: prevent pcp corruption with SMP=n iommu/sva: include mmu_notifier.h header mm: kmsan: fix poisoning of high-order non-compound pages tools/testing/selftests: add forked (un)/faulted VMA merge tests mm/vma: enforce VMA fork limit on unfaulted,faulted mremap merge too tools/testing/selftests: add tests for !tgt, src mremap() merges mm/vma: fix anon_vma UAF on mremap() faulted, unfaulted merge mm/zswap: fix error pointer free in zswap_cpu_comp_prepare() mm/damon/sysfs-scheme: cleanup access_pattern subdirs on scheme dir setup failure mm/damon/sysfs-scheme: cleanup quotas subdirs on scheme dir setup failure mm/damon/sysfs: cleanup attrs subdirs on context dir setup failure mm/damon/sysfs: cleanup intervals subdirs on attrs dir setup failure mm/damon/core: remove call_control in inactive contexts powerpc/watchdog: add support for hardlockup_sys_info sysctl mips: fix HIGHMEM initialization mm/hugetlb: ignore hugepage kernel args if hugepages are unsupported ...
This commit is contained in:
2
.mailmap
2
.mailmap
@@ -207,6 +207,7 @@ Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch>
|
||||
Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
|
||||
Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
|
||||
Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
|
||||
Daniel Thompson <danielt@kernel.org> <daniel.thompson@linaro.org>
|
||||
Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com>
|
||||
David Brownell <david-b@pacbell.net>
|
||||
David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
|
||||
@@ -794,6 +795,7 @@ Sven Eckelmann <sven@narfation.org> <sven.eckelmann@open-mesh.com>
|
||||
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@openmesh.com>
|
||||
Sven Eckelmann <sven@narfation.org> <sven@open-mesh.com>
|
||||
Sven Peter <sven@kernel.org> <sven@svenpeter.dev>
|
||||
Szymon Wilczek <swilczek.lx@gmail.com> <szymonwilczek@gmx.com>
|
||||
Takashi YOSHII <takashi.yoshii.zj@renesas.com>
|
||||
Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org>
|
||||
Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org>
|
||||
|
||||
@@ -2917,6 +2917,41 @@ Kernel parameters
|
||||
for Movable pages. "nn[KMGTPE]", "nn%", and "mirror"
|
||||
are exclusive, so you cannot specify multiple forms.
|
||||
|
||||
kfence.burst= [MM,KFENCE] The number of additional successive
|
||||
allocations to be attempted through KFENCE for each
|
||||
sample interval.
|
||||
Format: <unsigned integer>
|
||||
Default: 0
|
||||
|
||||
kfence.check_on_panic=
|
||||
[MM,KFENCE] Whether to check all KFENCE-managed objects'
|
||||
canaries on panic.
|
||||
Format: <bool>
|
||||
Default: false
|
||||
|
||||
kfence.deferrable=
|
||||
[MM,KFENCE] Whether to use a deferrable timer to trigger
|
||||
allocations. This avoids forcing CPU wake-ups if the
|
||||
system is idle, at the risk of a less predictable
|
||||
sample interval.
|
||||
Format: <bool>
|
||||
Default: CONFIG_KFENCE_DEFERRABLE
|
||||
|
||||
kfence.sample_interval=
|
||||
[MM,KFENCE] KFENCE's sample interval in milliseconds.
|
||||
Format: <unsigned integer>
|
||||
0 - Disable KFENCE.
|
||||
>0 - Enabled KFENCE with given sample interval.
|
||||
Default: CONFIG_KFENCE_SAMPLE_INTERVAL
|
||||
|
||||
kfence.skip_covered_thresh=
|
||||
[MM,KFENCE] If pool utilization reaches this threshold
|
||||
(pool usage%), KFENCE limits currently covered
|
||||
allocations of the same source from further filling
|
||||
up the pool.
|
||||
Format: <unsigned integer>
|
||||
Default: 75
|
||||
|
||||
kgdbdbgp= [KGDB,HW,EARLY] kgdb over EHCI usb debug port.
|
||||
Format: <Controller#>[,poll interval]
|
||||
The controller # is the number of the ehci usb debug
|
||||
|
||||
@@ -425,6 +425,28 @@ void __init paging_init(void)
|
||||
static struct kcore_list kcore_kseg0;
|
||||
#endif
|
||||
|
||||
static inline void __init highmem_init(void)
|
||||
{
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
unsigned long tmp;
|
||||
|
||||
/*
|
||||
* If CPU cannot support HIGHMEM discard the memory above highstart_pfn
|
||||
*/
|
||||
if (cpu_has_dc_aliases) {
|
||||
memblock_remove(PFN_PHYS(highstart_pfn), -1);
|
||||
return;
|
||||
}
|
||||
|
||||
for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
|
||||
struct page *page = pfn_to_page(tmp);
|
||||
|
||||
if (!memblock_is_memory(PFN_PHYS(tmp)))
|
||||
SetPageReserved(page);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init arch_mm_preinit(void)
|
||||
{
|
||||
/*
|
||||
@@ -435,6 +457,7 @@ void __init arch_mm_preinit(void)
|
||||
|
||||
maar_init();
|
||||
setup_zero_pages(); /* Setup zeroed pages. */
|
||||
highmem_init();
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
if ((unsigned long) &_text > (unsigned long) CKSEG0)
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/processor.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sys_info.h>
|
||||
|
||||
#include <asm/interrupt.h>
|
||||
#include <asm/paca.h>
|
||||
@@ -235,7 +236,11 @@ static void watchdog_smp_panic(int cpu)
|
||||
pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
|
||||
cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
|
||||
|
||||
if (!sysctl_hardlockup_all_cpu_backtrace) {
|
||||
if (sysctl_hardlockup_all_cpu_backtrace ||
|
||||
(hardlockup_si_mask & SYS_INFO_ALL_BT)) {
|
||||
trigger_allbutcpu_cpu_backtrace(cpu);
|
||||
cpumask_clear(&wd_smp_cpus_ipi);
|
||||
} else {
|
||||
/*
|
||||
* Try to trigger the stuck CPUs, unless we are going to
|
||||
* get a backtrace on all of them anyway.
|
||||
@@ -244,11 +249,9 @@ static void watchdog_smp_panic(int cpu)
|
||||
smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
|
||||
__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
|
||||
}
|
||||
} else {
|
||||
trigger_allbutcpu_cpu_backtrace(cpu);
|
||||
cpumask_clear(&wd_smp_cpus_ipi);
|
||||
}
|
||||
|
||||
sys_info(hardlockup_si_mask & ~SYS_INFO_ALL_BT);
|
||||
if (hardlockup_panic)
|
||||
nmi_panic(NULL, "Hard LOCKUP");
|
||||
|
||||
@@ -415,9 +418,11 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
|
||||
|
||||
xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
|
||||
|
||||
if (sysctl_hardlockup_all_cpu_backtrace)
|
||||
if (sysctl_hardlockup_all_cpu_backtrace ||
|
||||
(hardlockup_si_mask & SYS_INFO_ALL_BT))
|
||||
trigger_allbutcpu_cpu_backtrace(cpu);
|
||||
|
||||
sys_info(hardlockup_si_mask & ~SYS_INFO_ALL_BT);
|
||||
if (hardlockup_panic)
|
||||
nmi_panic(regs, "Hard LOCKUP");
|
||||
|
||||
|
||||
@@ -67,14 +67,16 @@ struct dev_dax_range {
|
||||
/**
|
||||
* struct dev_dax - instance data for a subdivision of a dax region, and
|
||||
* data while the device is activated in the driver.
|
||||
* @region - parent region
|
||||
* @dax_dev - core dax functionality
|
||||
* @region: parent region
|
||||
* @dax_dev: core dax functionality
|
||||
* @align: alignment of this instance
|
||||
* @target_node: effective numa node if dev_dax memory range is onlined
|
||||
* @dyn_id: is this a dynamic or statically created instance
|
||||
* @id: ida allocated id when the dax_region is not static
|
||||
* @ida: mapping id allocator
|
||||
* @dev - device core
|
||||
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
|
||||
* @dev: device core
|
||||
* @pgmap: pgmap for memmap setup / lifetime (driver owned)
|
||||
* @memmap_on_memory: allow kmem to put the memmap in the memory
|
||||
* @nr_range: size of @ranges
|
||||
* @ranges: range tuples of memory used
|
||||
*/
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* Helpers for IOMMU drivers implementing SVA
|
||||
*/
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/iommu.h>
|
||||
|
||||
@@ -211,6 +211,7 @@ struct kmem_obj_info;
|
||||
* __kfence_obj_info() - fill kmem_obj_info struct
|
||||
* @kpp: kmem_obj_info to be filled
|
||||
* @object: the object
|
||||
* @slab: the slab
|
||||
*
|
||||
* Return:
|
||||
* * false - not a KFENCE object
|
||||
|
||||
@@ -83,6 +83,7 @@ static inline void reset_hung_task_detector(void) { }
|
||||
#if defined(CONFIG_HARDLOCKUP_DETECTOR)
|
||||
extern void hardlockup_detector_disable(void);
|
||||
extern unsigned int hardlockup_panic;
|
||||
extern unsigned long hardlockup_si_mask;
|
||||
#else
|
||||
static inline void hardlockup_detector_disable(void) {}
|
||||
#endif
|
||||
|
||||
@@ -325,6 +325,7 @@ static inline void might_alloc(gfp_t gfp_mask)
|
||||
|
||||
/**
|
||||
* memalloc_flags_save - Add a PF_* flag to current->flags, save old value
|
||||
* @flags: Flags to add.
|
||||
*
|
||||
* This allows PF_* flags to be conveniently added, irrespective of current
|
||||
* value, and then the old version restored with memalloc_flags_restore().
|
||||
|
||||
@@ -35,6 +35,7 @@ struct ts_state
|
||||
* @get_pattern: return head of pattern
|
||||
* @get_pattern_len: return length of pattern
|
||||
* @owner: module reference to algorithm
|
||||
* @list: list to search
|
||||
*/
|
||||
struct ts_ops
|
||||
{
|
||||
|
||||
@@ -460,27 +460,23 @@ static void __init deserialize_bitmap(unsigned int order,
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if memory was deserizlied */
|
||||
static bool __init kho_mem_deserialize(const void *fdt)
|
||||
/* Returns physical address of the preserved memory map from FDT */
|
||||
static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
|
||||
{
|
||||
struct khoser_mem_chunk *chunk;
|
||||
const void *mem_ptr;
|
||||
u64 mem;
|
||||
int len;
|
||||
|
||||
mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
|
||||
if (!mem_ptr || len != sizeof(u64)) {
|
||||
pr_err("failed to get preserved memory bitmaps\n");
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
mem = get_unaligned((const u64 *)mem_ptr);
|
||||
chunk = mem ? phys_to_virt(mem) : NULL;
|
||||
|
||||
/* No preserved physical pages were passed, no deserialization */
|
||||
if (!chunk)
|
||||
return false;
|
||||
return get_unaligned((const u64 *)mem_ptr);
|
||||
}
|
||||
|
||||
static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
|
||||
{
|
||||
while (chunk) {
|
||||
unsigned int i;
|
||||
|
||||
@@ -489,8 +485,6 @@ static bool __init kho_mem_deserialize(const void *fdt)
|
||||
&chunk->bitmaps[i]);
|
||||
chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1253,6 +1247,7 @@ bool kho_finalized(void)
|
||||
struct kho_in {
|
||||
phys_addr_t fdt_phys;
|
||||
phys_addr_t scratch_phys;
|
||||
phys_addr_t mem_map_phys;
|
||||
struct kho_debugfs dbg;
|
||||
};
|
||||
|
||||
@@ -1434,12 +1429,10 @@ static void __init kho_release_scratch(void)
|
||||
|
||||
void __init kho_memory_init(void)
|
||||
{
|
||||
if (kho_in.scratch_phys) {
|
||||
if (kho_in.mem_map_phys) {
|
||||
kho_scratch = phys_to_virt(kho_in.scratch_phys);
|
||||
kho_release_scratch();
|
||||
|
||||
if (!kho_mem_deserialize(kho_get_fdt()))
|
||||
kho_in.fdt_phys = 0;
|
||||
kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
|
||||
} else {
|
||||
kho_reserve_scratch();
|
||||
}
|
||||
@@ -1448,8 +1441,9 @@ void __init kho_memory_init(void)
|
||||
void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
|
||||
phys_addr_t scratch_phys, u64 scratch_len)
|
||||
{
|
||||
void *fdt = NULL;
|
||||
struct kho_scratch *scratch = NULL;
|
||||
phys_addr_t mem_map_phys;
|
||||
void *fdt = NULL;
|
||||
int err = 0;
|
||||
unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
|
||||
|
||||
@@ -1475,6 +1469,12 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
|
||||
goto out;
|
||||
}
|
||||
|
||||
mem_map_phys = kho_get_mem_map_phys(fdt);
|
||||
if (!mem_map_phys) {
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
scratch = early_memremap(scratch_phys, scratch_len);
|
||||
if (!scratch) {
|
||||
pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
|
||||
@@ -1515,6 +1515,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
|
||||
|
||||
kho_in.fdt_phys = fdt_phys;
|
||||
kho_in.scratch_phys = scratch_phys;
|
||||
kho_in.mem_map_phys = mem_map_phys;
|
||||
kho_scratch_cnt = scratch_cnt;
|
||||
pr_info("found kexec handover data.\n");
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ unsigned int __read_mostly hardlockup_panic =
|
||||
* hard lockup is detected, it could be task, memory, lock etc.
|
||||
* Refer include/linux/sys_info.h for detailed bit definition.
|
||||
*/
|
||||
static unsigned long hardlockup_si_mask;
|
||||
unsigned long hardlockup_si_mask;
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <linux/elf.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/secretmem.h>
|
||||
|
||||
#define BUILD_ID 3
|
||||
@@ -46,20 +47,9 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
|
||||
|
||||
freader_put_folio(r);
|
||||
|
||||
/* reject secretmem folios created with memfd_secret() */
|
||||
if (secretmem_mapping(r->file->f_mapping))
|
||||
return -EFAULT;
|
||||
|
||||
/* only use page cache lookup - fail if not already cached */
|
||||
r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
|
||||
|
||||
/* if sleeping is allowed, wait for the page, if necessary */
|
||||
if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) {
|
||||
filemap_invalidate_lock_shared(r->file->f_mapping);
|
||||
r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT,
|
||||
NULL, r->file);
|
||||
filemap_invalidate_unlock_shared(r->file->f_mapping);
|
||||
}
|
||||
|
||||
if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
|
||||
if (!IS_ERR(r->folio))
|
||||
folio_put(r->folio);
|
||||
@@ -97,6 +87,24 @@ const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
|
||||
return r->data + file_off;
|
||||
}
|
||||
|
||||
/* reject secretmem folios created with memfd_secret() */
|
||||
if (secretmem_mapping(r->file->f_mapping)) {
|
||||
r->err = -EFAULT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* use __kernel_read() for sleepable context */
|
||||
if (r->may_fault) {
|
||||
ssize_t ret;
|
||||
|
||||
ret = __kernel_read(r->file, r->buf, sz, &file_off);
|
||||
if (ret != sz) {
|
||||
r->err = (ret < 0) ? ret : -EIO;
|
||||
return NULL;
|
||||
}
|
||||
return r->buf;
|
||||
}
|
||||
|
||||
/* fetch or reuse folio for given file offset */
|
||||
r->err = freader_get_folio(r, file_off);
|
||||
if (r->err)
|
||||
|
||||
@@ -1431,6 +1431,35 @@ bool damon_is_running(struct damon_ctx *ctx)
|
||||
return running;
|
||||
}
|
||||
|
||||
/*
|
||||
* damon_call_handle_inactive_ctx() - handle DAMON call request that added to
|
||||
* an inactive context.
|
||||
* @ctx: The inactive DAMON context.
|
||||
* @control: Control variable of the call request.
|
||||
*
|
||||
* This function is called in a case that @control is added to @ctx but @ctx is
|
||||
* not running (inactive). See if @ctx handled @control or not, and cleanup
|
||||
* @control if it was not handled.
|
||||
*
|
||||
* Returns 0 if @control was handled by @ctx, negative error code otherwise.
|
||||
*/
|
||||
static int damon_call_handle_inactive_ctx(
|
||||
struct damon_ctx *ctx, struct damon_call_control *control)
|
||||
{
|
||||
struct damon_call_control *c;
|
||||
|
||||
mutex_lock(&ctx->call_controls_lock);
|
||||
list_for_each_entry(c, &ctx->call_controls, list) {
|
||||
if (c == control) {
|
||||
list_del(&control->list);
|
||||
mutex_unlock(&ctx->call_controls_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&ctx->call_controls_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* damon_call() - Invoke a given function on DAMON worker thread (kdamond).
|
||||
* @ctx: DAMON context to call the function for.
|
||||
@@ -1461,7 +1490,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
|
||||
list_add_tail(&control->list, &ctx->call_controls);
|
||||
mutex_unlock(&ctx->call_controls_lock);
|
||||
if (!damon_is_running(ctx))
|
||||
return -EINVAL;
|
||||
return damon_call_handle_inactive_ctx(ctx, control);
|
||||
if (control->repeat)
|
||||
return 0;
|
||||
wait_for_completion(&control->completion);
|
||||
@@ -2051,13 +2080,15 @@ static unsigned long damos_get_node_memcg_used_bp(
|
||||
|
||||
rcu_read_lock();
|
||||
memcg = mem_cgroup_from_id(goal->memcg_id);
|
||||
rcu_read_unlock();
|
||||
if (!memcg) {
|
||||
if (!memcg || !mem_cgroup_tryget(memcg)) {
|
||||
rcu_read_unlock();
|
||||
if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
|
||||
return 0;
|
||||
else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */
|
||||
return 10000;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
mem_cgroup_flush_stats(memcg);
|
||||
lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(goal->nid));
|
||||
used_pages = lruvec_page_state(lruvec, NR_ACTIVE_ANON);
|
||||
@@ -2065,6 +2096,8 @@ static unsigned long damos_get_node_memcg_used_bp(
|
||||
used_pages += lruvec_page_state(lruvec, NR_ACTIVE_FILE);
|
||||
used_pages += lruvec_page_state(lruvec, NR_INACTIVE_FILE);
|
||||
|
||||
mem_cgroup_put(memcg);
|
||||
|
||||
si_meminfo_node(&i, goal->nid);
|
||||
if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
|
||||
numerator = used_pages;
|
||||
@@ -2751,13 +2784,13 @@ done:
|
||||
if (ctx->ops.cleanup)
|
||||
ctx->ops.cleanup(ctx);
|
||||
kfree(ctx->regions_score_histogram);
|
||||
kdamond_call(ctx, true);
|
||||
|
||||
pr_debug("kdamond (%d) finishes\n", current->pid);
|
||||
mutex_lock(&ctx->kdamond_lock);
|
||||
ctx->kdamond = NULL;
|
||||
mutex_unlock(&ctx->kdamond_lock);
|
||||
|
||||
kdamond_call(ctx, true);
|
||||
damos_walk_cancel(ctx);
|
||||
|
||||
mutex_lock(&damon_lock);
|
||||
|
||||
@@ -2152,13 +2152,13 @@ static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
|
||||
return err;
|
||||
err = damos_sysfs_set_dests(scheme);
|
||||
if (err)
|
||||
goto put_access_pattern_out;
|
||||
goto rmdir_put_access_pattern_out;
|
||||
err = damon_sysfs_scheme_set_quotas(scheme);
|
||||
if (err)
|
||||
goto put_dests_out;
|
||||
err = damon_sysfs_scheme_set_watermarks(scheme);
|
||||
if (err)
|
||||
goto put_quotas_access_pattern_out;
|
||||
goto rmdir_put_quotas_access_pattern_out;
|
||||
err = damos_sysfs_set_filter_dirs(scheme);
|
||||
if (err)
|
||||
goto put_watermarks_quotas_access_pattern_out;
|
||||
@@ -2183,13 +2183,15 @@ put_filters_watermarks_quotas_access_pattern_out:
|
||||
put_watermarks_quotas_access_pattern_out:
|
||||
kobject_put(&scheme->watermarks->kobj);
|
||||
scheme->watermarks = NULL;
|
||||
put_quotas_access_pattern_out:
|
||||
rmdir_put_quotas_access_pattern_out:
|
||||
damon_sysfs_quotas_rm_dirs(scheme->quotas);
|
||||
kobject_put(&scheme->quotas->kobj);
|
||||
scheme->quotas = NULL;
|
||||
put_dests_out:
|
||||
kobject_put(&scheme->dests->kobj);
|
||||
scheme->dests = NULL;
|
||||
put_access_pattern_out:
|
||||
rmdir_put_access_pattern_out:
|
||||
damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern);
|
||||
kobject_put(&scheme->access_pattern->kobj);
|
||||
scheme->access_pattern = NULL;
|
||||
return err;
|
||||
|
||||
@@ -792,7 +792,7 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs)
|
||||
nr_regions_range = damon_sysfs_ul_range_alloc(10, 1000);
|
||||
if (!nr_regions_range) {
|
||||
err = -ENOMEM;
|
||||
goto put_intervals_out;
|
||||
goto rmdir_put_intervals_out;
|
||||
}
|
||||
|
||||
err = kobject_init_and_add(&nr_regions_range->kobj,
|
||||
@@ -806,6 +806,8 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs)
|
||||
put_nr_regions_intervals_out:
|
||||
kobject_put(&nr_regions_range->kobj);
|
||||
attrs->nr_regions_range = NULL;
|
||||
rmdir_put_intervals_out:
|
||||
damon_sysfs_intervals_rm_dirs(intervals);
|
||||
put_intervals_out:
|
||||
kobject_put(&intervals->kobj);
|
||||
attrs->intervals = NULL;
|
||||
@@ -948,7 +950,7 @@ static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context)
|
||||
|
||||
err = damon_sysfs_context_set_targets(context);
|
||||
if (err)
|
||||
goto put_attrs_out;
|
||||
goto rmdir_put_attrs_out;
|
||||
|
||||
err = damon_sysfs_context_set_schemes(context);
|
||||
if (err)
|
||||
@@ -958,7 +960,8 @@ static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context)
|
||||
put_targets_attrs_out:
|
||||
kobject_put(&context->targets->kobj);
|
||||
context->targets = NULL;
|
||||
put_attrs_out:
|
||||
rmdir_put_attrs_out:
|
||||
damon_sysfs_attrs_rm_dirs(context->attrs);
|
||||
kobject_put(&context->attrs->kobj);
|
||||
context->attrs = NULL;
|
||||
return err;
|
||||
|
||||
16
mm/hugetlb.c
16
mm/hugetlb.c
@@ -4286,6 +4286,11 @@ static int __init hugepages_setup(char *s)
|
||||
unsigned long tmp;
|
||||
char *p = s;
|
||||
|
||||
if (!hugepages_supported()) {
|
||||
pr_warn("HugeTLB: hugepages unsupported, ignoring hugepages=%s cmdline\n", s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!parsed_valid_hugepagesz) {
|
||||
pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s);
|
||||
parsed_valid_hugepagesz = true;
|
||||
@@ -4366,6 +4371,11 @@ static int __init hugepagesz_setup(char *s)
|
||||
unsigned long size;
|
||||
struct hstate *h;
|
||||
|
||||
if (!hugepages_supported()) {
|
||||
pr_warn("HugeTLB: hugepages unsupported, ignoring hugepagesz=%s cmdline\n", s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
parsed_valid_hugepagesz = false;
|
||||
size = (unsigned long)memparse(s, NULL);
|
||||
|
||||
@@ -4414,6 +4424,12 @@ static int __init default_hugepagesz_setup(char *s)
|
||||
unsigned long size;
|
||||
int i;
|
||||
|
||||
if (!hugepages_supported()) {
|
||||
pr_warn("HugeTLB: hugepages unsupported, ignoring default_hugepagesz=%s cmdline\n",
|
||||
s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
parsed_valid_hugepagesz = false;
|
||||
if (parsed_default_hugepagesz) {
|
||||
pr_err("HugeTLB: default_hugepagesz previously specified, ignoring %s\n", s);
|
||||
|
||||
@@ -207,7 +207,7 @@ void kmsan_free_page(struct page *page, unsigned int order)
|
||||
if (!kmsan_enabled || kmsan_in_runtime())
|
||||
return;
|
||||
kmsan_enter_runtime();
|
||||
kmsan_internal_poison_memory(page_address(page), page_size(page),
|
||||
kmsan_internal_poison_memory(page_address(page), PAGE_SIZE << order,
|
||||
GFP_KERNEL & ~(__GFP_RECLAIM),
|
||||
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
|
||||
kmsan_leave_runtime();
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
#include <linux/numa.h>
|
||||
#include <linux/numa_memblks.h>
|
||||
|
||||
#include <asm/numa.h>
|
||||
|
||||
int numa_distance_cnt;
|
||||
static u8 *numa_distance;
|
||||
|
||||
|
||||
@@ -167,6 +167,33 @@ static inline void __pcp_trylock_noop(unsigned long *flags) { }
|
||||
pcp_trylock_finish(UP_flags); \
|
||||
})
|
||||
|
||||
/*
|
||||
* With the UP spinlock implementation, when we spin_lock(&pcp->lock) (for i.e.
|
||||
* a potentially remote cpu drain) and get interrupted by an operation that
|
||||
* attempts pcp_spin_trylock(), we can't rely on the trylock failure due to UP
|
||||
* spinlock assumptions making the trylock a no-op. So we have to turn that
|
||||
* spin_lock() to a spin_lock_irqsave(). This works because on UP there are no
|
||||
* remote cpu's so we can only be locking the only existing local one.
|
||||
*/
|
||||
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
|
||||
static inline void __flags_noop(unsigned long *flags) { }
|
||||
#define pcp_spin_lock_maybe_irqsave(ptr, flags) \
|
||||
({ \
|
||||
__flags_noop(&(flags)); \
|
||||
spin_lock(&(ptr)->lock); \
|
||||
})
|
||||
#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \
|
||||
({ \
|
||||
spin_unlock(&(ptr)->lock); \
|
||||
__flags_noop(&(flags)); \
|
||||
})
|
||||
#else
|
||||
#define pcp_spin_lock_maybe_irqsave(ptr, flags) \
|
||||
spin_lock_irqsave(&(ptr)->lock, flags)
|
||||
#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \
|
||||
spin_unlock_irqrestore(&(ptr)->lock, flags)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
|
||||
DEFINE_PER_CPU(int, numa_node);
|
||||
EXPORT_PER_CPU_SYMBOL(numa_node);
|
||||
@@ -2556,6 +2583,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
{
|
||||
int high_min, to_drain, to_drain_batched, batch;
|
||||
unsigned long UP_flags;
|
||||
bool todo = false;
|
||||
|
||||
high_min = READ_ONCE(pcp->high_min);
|
||||
@@ -2575,9 +2603,9 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
to_drain = pcp->count - pcp->high;
|
||||
while (to_drain > 0) {
|
||||
to_drain_batched = min(to_drain, batch);
|
||||
spin_lock(&pcp->lock);
|
||||
pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
|
||||
free_pcppages_bulk(zone, to_drain_batched, pcp, 0);
|
||||
spin_unlock(&pcp->lock);
|
||||
pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
|
||||
todo = true;
|
||||
|
||||
to_drain -= to_drain_batched;
|
||||
@@ -2594,14 +2622,15 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
*/
|
||||
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
{
|
||||
unsigned long UP_flags;
|
||||
int to_drain, batch;
|
||||
|
||||
batch = READ_ONCE(pcp->batch);
|
||||
to_drain = min(pcp->count, batch);
|
||||
if (to_drain > 0) {
|
||||
spin_lock(&pcp->lock);
|
||||
pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
|
||||
free_pcppages_bulk(zone, to_drain, pcp, 0);
|
||||
spin_unlock(&pcp->lock);
|
||||
pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -2612,10 +2641,11 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
|
||||
{
|
||||
struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
|
||||
unsigned long UP_flags;
|
||||
int count;
|
||||
|
||||
do {
|
||||
spin_lock(&pcp->lock);
|
||||
pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
|
||||
count = pcp->count;
|
||||
if (count) {
|
||||
int to_drain = min(count,
|
||||
@@ -2624,7 +2654,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
|
||||
free_pcppages_bulk(zone, to_drain, pcp, 0);
|
||||
count -= to_drain;
|
||||
}
|
||||
spin_unlock(&pcp->lock);
|
||||
pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
|
||||
} while (count);
|
||||
}
|
||||
|
||||
@@ -6109,6 +6139,7 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
|
||||
{
|
||||
struct per_cpu_pages *pcp;
|
||||
struct cpu_cacheinfo *cci;
|
||||
unsigned long UP_flags;
|
||||
|
||||
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
|
||||
cci = get_cpu_cacheinfo(cpu);
|
||||
@@ -6119,12 +6150,12 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
|
||||
* This can reduce zone lock contention without hurting
|
||||
* cache-hot pages sharing.
|
||||
*/
|
||||
spin_lock(&pcp->lock);
|
||||
pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
|
||||
if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
|
||||
pcp->flags |= PCPF_FREE_HIGH_BATCH;
|
||||
else
|
||||
pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
|
||||
spin_unlock(&pcp->lock);
|
||||
pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
|
||||
}
|
||||
|
||||
void setup_pcp_cacheinfo(unsigned int cpu)
|
||||
@@ -6667,11 +6698,19 @@ static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table *
|
||||
int old_percpu_pagelist_high_fraction;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Avoid using pcp_batch_high_lock for reads as the value is read
|
||||
* atomically and a race with offlining is harmless.
|
||||
*/
|
||||
|
||||
if (!write)
|
||||
return proc_dointvec_minmax(table, write, buffer, length, ppos);
|
||||
|
||||
mutex_lock(&pcp_batch_high_lock);
|
||||
old_percpu_pagelist_high_fraction = percpu_pagelist_high_fraction;
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
|
||||
if (!write || ret < 0)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* Sanity checking to avoid pcp imbalance */
|
||||
|
||||
111
mm/vma.c
111
mm/vma.c
@@ -67,18 +67,13 @@ struct mmap_state {
|
||||
.state = VMA_MERGE_START, \
|
||||
}
|
||||
|
||||
/*
|
||||
* If, at any point, the VMA had unCoW'd mappings from parents, it will maintain
|
||||
* more than one anon_vma_chain connecting it to more than one anon_vma. A merge
|
||||
* would mean a wider range of folios sharing the root anon_vma lock, and thus
|
||||
* potential lock contention, we do not wish to encourage merging such that this
|
||||
* scales to a problem.
|
||||
*/
|
||||
static bool vma_had_uncowed_parents(struct vm_area_struct *vma)
|
||||
/* Was this VMA ever forked from a parent, i.e. maybe contains CoW mappings? */
|
||||
static bool vma_is_fork_child(struct vm_area_struct *vma)
|
||||
{
|
||||
/*
|
||||
* The list_is_singular() test is to avoid merging VMA cloned from
|
||||
* parents. This can improve scalability caused by anon_vma lock.
|
||||
* parents. This can improve scalability caused by the anon_vma root
|
||||
* lock.
|
||||
*/
|
||||
return vma && vma->anon_vma && !list_is_singular(&vma->anon_vma_chain);
|
||||
}
|
||||
@@ -115,11 +110,19 @@ static bool is_mergeable_anon_vma(struct vma_merge_struct *vmg, bool merge_next)
|
||||
VM_WARN_ON(src && src_anon != src->anon_vma);
|
||||
|
||||
/* Case 1 - we will dup_anon_vma() from src into tgt. */
|
||||
if (!tgt_anon && src_anon)
|
||||
return !vma_had_uncowed_parents(src);
|
||||
if (!tgt_anon && src_anon) {
|
||||
struct vm_area_struct *copied_from = vmg->copied_from;
|
||||
|
||||
if (vma_is_fork_child(src))
|
||||
return false;
|
||||
if (vma_is_fork_child(copied_from))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
/* Case 2 - we will simply use tgt's anon_vma. */
|
||||
if (tgt_anon && !src_anon)
|
||||
return !vma_had_uncowed_parents(tgt);
|
||||
return !vma_is_fork_child(tgt);
|
||||
/* Case 3 - the anon_vma's are already shared. */
|
||||
return src_anon == tgt_anon;
|
||||
}
|
||||
@@ -829,6 +832,8 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
|
||||
VM_WARN_ON_VMG(middle &&
|
||||
!(vma_iter_addr(vmg->vmi) >= middle->vm_start &&
|
||||
vma_iter_addr(vmg->vmi) < middle->vm_end), vmg);
|
||||
/* An existing merge can never be used by the mremap() logic. */
|
||||
VM_WARN_ON_VMG(vmg->copied_from, vmg);
|
||||
|
||||
vmg->state = VMA_MERGE_NOMERGE;
|
||||
|
||||
@@ -1098,6 +1103,33 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* vma_merge_copied_range - Attempt to merge a VMA that is being copied by
|
||||
* mremap()
|
||||
*
|
||||
* @vmg: Describes the VMA we are adding, in the copied-to range @vmg->start to
|
||||
* @vmg->end (exclusive), which we try to merge with any adjacent VMAs if
|
||||
* possible.
|
||||
*
|
||||
* vmg->prev, next, start, end, pgoff should all be relative to the COPIED TO
|
||||
* range, i.e. the target range for the VMA.
|
||||
*
|
||||
* Returns: In instances where no merge was possible, NULL. Otherwise, a pointer
|
||||
* to the VMA we expanded.
|
||||
*
|
||||
* ASSUMPTIONS: Same as vma_merge_new_range(), except vmg->middle must contain
|
||||
* the copied-from VMA.
|
||||
*/
|
||||
static struct vm_area_struct *vma_merge_copied_range(struct vma_merge_struct *vmg)
|
||||
{
|
||||
/* We must have a copied-from VMA. */
|
||||
VM_WARN_ON_VMG(!vmg->middle, vmg);
|
||||
|
||||
vmg->copied_from = vmg->middle;
|
||||
vmg->middle = NULL;
|
||||
return vma_merge_new_range(vmg);
|
||||
}
|
||||
|
||||
/*
|
||||
* vma_expand - Expand an existing VMA
|
||||
*
|
||||
@@ -1117,46 +1149,52 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
|
||||
int vma_expand(struct vma_merge_struct *vmg)
|
||||
{
|
||||
struct vm_area_struct *anon_dup = NULL;
|
||||
bool remove_next = false;
|
||||
struct vm_area_struct *target = vmg->target;
|
||||
struct vm_area_struct *next = vmg->next;
|
||||
bool remove_next = false;
|
||||
vm_flags_t sticky_flags;
|
||||
|
||||
sticky_flags = vmg->vm_flags & VM_STICKY;
|
||||
sticky_flags |= target->vm_flags & VM_STICKY;
|
||||
|
||||
VM_WARN_ON_VMG(!target, vmg);
|
||||
int ret = 0;
|
||||
|
||||
mmap_assert_write_locked(vmg->mm);
|
||||
|
||||
vma_start_write(target);
|
||||
if (next && (target != next) && (vmg->end == next->vm_end)) {
|
||||
int ret;
|
||||
|
||||
sticky_flags |= next->vm_flags & VM_STICKY;
|
||||
if (next && target != next && vmg->end == next->vm_end)
|
||||
remove_next = true;
|
||||
/* This should already have been checked by this point. */
|
||||
VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg);
|
||||
vma_start_write(next);
|
||||
/*
|
||||
* In this case we don't report OOM, so vmg->give_up_on_mm is
|
||||
* safe.
|
||||
*/
|
||||
ret = dup_anon_vma(target, next, &anon_dup);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* We must have a target. */
|
||||
VM_WARN_ON_VMG(!target, vmg);
|
||||
/* This should have already been checked by this point. */
|
||||
VM_WARN_ON_VMG(remove_next && !can_merge_remove_vma(next), vmg);
|
||||
/* Not merging but overwriting any part of next is not handled. */
|
||||
VM_WARN_ON_VMG(next && !remove_next &&
|
||||
next != target && vmg->end > next->vm_start, vmg);
|
||||
/* Only handles expanding */
|
||||
/* Only handles expanding. */
|
||||
VM_WARN_ON_VMG(target->vm_start < vmg->start ||
|
||||
target->vm_end > vmg->end, vmg);
|
||||
|
||||
sticky_flags = vmg->vm_flags & VM_STICKY;
|
||||
sticky_flags |= target->vm_flags & VM_STICKY;
|
||||
if (remove_next)
|
||||
vmg->__remove_next = true;
|
||||
sticky_flags |= next->vm_flags & VM_STICKY;
|
||||
|
||||
/*
|
||||
* If we are removing the next VMA or copying from a VMA
|
||||
* (e.g. mremap()'ing), we must propagate anon_vma state.
|
||||
*
|
||||
* Note that, by convention, callers ignore OOM for this case, so
|
||||
* we don't need to account for vmg->give_up_on_mm here.
|
||||
*/
|
||||
if (remove_next)
|
||||
ret = dup_anon_vma(target, next, &anon_dup);
|
||||
if (!ret && vmg->copied_from)
|
||||
ret = dup_anon_vma(target, vmg->copied_from, &anon_dup);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (remove_next) {
|
||||
vma_start_write(next);
|
||||
vmg->__remove_next = true;
|
||||
}
|
||||
if (commit_merge(vmg))
|
||||
goto nomem;
|
||||
|
||||
@@ -1828,10 +1866,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
||||
if (new_vma && new_vma->vm_start < addr + len)
|
||||
return NULL; /* should never get here */
|
||||
|
||||
vmg.middle = NULL; /* New VMA range. */
|
||||
vmg.pgoff = pgoff;
|
||||
vmg.next = vma_iter_next_rewind(&vmi, NULL);
|
||||
new_vma = vma_merge_new_range(&vmg);
|
||||
new_vma = vma_merge_copied_range(&vmg);
|
||||
|
||||
if (new_vma) {
|
||||
/*
|
||||
|
||||
3
mm/vma.h
3
mm/vma.h
@@ -106,6 +106,9 @@ struct vma_merge_struct {
|
||||
struct anon_vma_name *anon_name;
|
||||
enum vma_merge_state state;
|
||||
|
||||
/* If copied from (i.e. mremap()'d) the VMA from which we are copying. */
|
||||
struct vm_area_struct *copied_from;
|
||||
|
||||
/* Flags which callers can use to modify merge behaviour: */
|
||||
|
||||
/*
|
||||
|
||||
@@ -4248,7 +4248,7 @@ void *vzalloc_node_noprof(unsigned long size, int node)
|
||||
EXPORT_SYMBOL(vzalloc_node_noprof);
|
||||
|
||||
/**
|
||||
* vrealloc_node_align_noprof - reallocate virtually contiguous memory; contents
|
||||
* vrealloc_node_align - reallocate virtually contiguous memory; contents
|
||||
* remain unchanged
|
||||
* @p: object to reallocate memory for
|
||||
* @size: the size to reallocate
|
||||
|
||||
@@ -787,7 +787,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (acomp)
|
||||
if (!IS_ERR_OR_NULL(acomp))
|
||||
crypto_free_acomp(acomp);
|
||||
kfree(buffer);
|
||||
return ret;
|
||||
|
||||
@@ -179,7 +179,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
||||
if (rw && shared && fs_is_unknown(fs_type)) {
|
||||
ksft_print_msg("Unknown filesystem\n");
|
||||
result = KSFT_SKIP;
|
||||
return;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* R/O pinning or pinning in a private mapping is always
|
||||
|
||||
@@ -22,12 +22,37 @@ FIXTURE(merge)
|
||||
struct procmap_fd procmap;
|
||||
};
|
||||
|
||||
static char *map_carveout(unsigned int page_size)
|
||||
{
|
||||
return mmap(NULL, 30 * page_size, PROT_NONE,
|
||||
MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
}
|
||||
|
||||
static pid_t do_fork(struct procmap_fd *procmap)
|
||||
{
|
||||
pid_t pid = fork();
|
||||
|
||||
if (pid == -1)
|
||||
return -1;
|
||||
if (pid != 0) {
|
||||
wait(NULL);
|
||||
return pid;
|
||||
}
|
||||
|
||||
/* Reopen for child. */
|
||||
if (close_procmap(procmap))
|
||||
return -1;
|
||||
if (open_self_procmap(procmap))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
FIXTURE_SETUP(merge)
|
||||
{
|
||||
self->page_size = psize();
|
||||
/* Carve out PROT_NONE region to map over. */
|
||||
self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE,
|
||||
MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
self->carveout = map_carveout(self->page_size);
|
||||
ASSERT_NE(self->carveout, MAP_FAILED);
|
||||
/* Setup PROCMAP_QUERY interface. */
|
||||
ASSERT_EQ(open_self_procmap(&self->procmap), 0);
|
||||
@@ -36,7 +61,8 @@ FIXTURE_SETUP(merge)
|
||||
FIXTURE_TEARDOWN(merge)
|
||||
{
|
||||
ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0);
|
||||
ASSERT_EQ(close_procmap(&self->procmap), 0);
|
||||
/* May fail for parent of forked process. */
|
||||
close_procmap(&self->procmap);
|
||||
/*
|
||||
* Clear unconditionally, as some tests set this. It is no issue if this
|
||||
* fails (KSM may be disabled for instance).
|
||||
@@ -44,6 +70,44 @@ FIXTURE_TEARDOWN(merge)
|
||||
prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
FIXTURE(merge_with_fork)
|
||||
{
|
||||
unsigned int page_size;
|
||||
char *carveout;
|
||||
struct procmap_fd procmap;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT(merge_with_fork)
|
||||
{
|
||||
bool forked;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT_ADD(merge_with_fork, forked)
|
||||
{
|
||||
.forked = true,
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT_ADD(merge_with_fork, unforked)
|
||||
{
|
||||
.forked = false,
|
||||
};
|
||||
|
||||
FIXTURE_SETUP(merge_with_fork)
|
||||
{
|
||||
self->page_size = psize();
|
||||
self->carveout = map_carveout(self->page_size);
|
||||
ASSERT_NE(self->carveout, MAP_FAILED);
|
||||
ASSERT_EQ(open_self_procmap(&self->procmap), 0);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(merge_with_fork)
|
||||
{
|
||||
ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0);
|
||||
ASSERT_EQ(close_procmap(&self->procmap), 0);
|
||||
/* See above. */
|
||||
prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
TEST_F(merge, mprotect_unfaulted_left)
|
||||
{
|
||||
unsigned int page_size = self->page_size;
|
||||
@@ -322,8 +386,8 @@ TEST_F(merge, forked_target_vma)
|
||||
unsigned int page_size = self->page_size;
|
||||
char *carveout = self->carveout;
|
||||
struct procmap_fd *procmap = &self->procmap;
|
||||
pid_t pid;
|
||||
char *ptr, *ptr2;
|
||||
pid_t pid;
|
||||
int i;
|
||||
|
||||
/*
|
||||
@@ -344,19 +408,10 @@ TEST_F(merge, forked_target_vma)
|
||||
*/
|
||||
ptr[0] = 'x';
|
||||
|
||||
pid = fork();
|
||||
pid = do_fork(&self->procmap);
|
||||
ASSERT_NE(pid, -1);
|
||||
|
||||
if (pid != 0) {
|
||||
wait(NULL);
|
||||
if (pid != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Child process below: */
|
||||
|
||||
/* Reopen for child. */
|
||||
ASSERT_EQ(close_procmap(&self->procmap), 0);
|
||||
ASSERT_EQ(open_self_procmap(&self->procmap), 0);
|
||||
|
||||
/* unCOWing everything does not cause the AVC to go away. */
|
||||
for (i = 0; i < 5 * page_size; i += page_size)
|
||||
@@ -386,8 +441,8 @@ TEST_F(merge, forked_source_vma)
|
||||
unsigned int page_size = self->page_size;
|
||||
char *carveout = self->carveout;
|
||||
struct procmap_fd *procmap = &self->procmap;
|
||||
pid_t pid;
|
||||
char *ptr, *ptr2;
|
||||
pid_t pid;
|
||||
int i;
|
||||
|
||||
/*
|
||||
@@ -408,19 +463,10 @@ TEST_F(merge, forked_source_vma)
|
||||
*/
|
||||
ptr[0] = 'x';
|
||||
|
||||
pid = fork();
|
||||
pid = do_fork(&self->procmap);
|
||||
ASSERT_NE(pid, -1);
|
||||
|
||||
if (pid != 0) {
|
||||
wait(NULL);
|
||||
if (pid != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Child process below: */
|
||||
|
||||
/* Reopen for child. */
|
||||
ASSERT_EQ(close_procmap(&self->procmap), 0);
|
||||
ASSERT_EQ(open_self_procmap(&self->procmap), 0);
|
||||
|
||||
/* unCOWing everything does not cause the AVC to go away. */
|
||||
for (i = 0; i < 5 * page_size; i += page_size)
|
||||
@@ -1171,4 +1217,288 @@ TEST_F(merge, mremap_correct_placed_faulted)
|
||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
|
||||
}
|
||||
|
||||
TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev)
|
||||
{
|
||||
struct procmap_fd *procmap = &self->procmap;
|
||||
unsigned int page_size = self->page_size;
|
||||
unsigned long offset;
|
||||
char *ptr_a, *ptr_b;
|
||||
|
||||
/*
|
||||
* mremap() such that A and B merge:
|
||||
*
|
||||
* |------------|
|
||||
* | \ |
|
||||
* |-----------| | / |---------|
|
||||
* | unfaulted | v \ | faulted |
|
||||
* |-----------| / |---------|
|
||||
* B \ A
|
||||
*/
|
||||
|
||||
/* Map VMA A into place. */
|
||||
ptr_a = mmap(&self->carveout[page_size + 3 * page_size],
|
||||
3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
/* Fault it in. */
|
||||
ptr_a[0] = 'x';
|
||||
|
||||
if (variant->forked) {
|
||||
pid_t pid = do_fork(&self->procmap);
|
||||
|
||||
ASSERT_NE(pid, -1);
|
||||
if (pid != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now move it out of the way so we can place VMA B in position,
|
||||
* unfaulted.
|
||||
*/
|
||||
ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
|
||||
/* Map VMA B into place. */
|
||||
ptr_b = mmap(&self->carveout[page_size], 3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_b, MAP_FAILED);
|
||||
|
||||
/*
|
||||
* Now move VMA A into position with MREMAP_DONTUNMAP to catch incorrect
|
||||
* anon_vma propagation.
|
||||
*/
|
||||
ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
|
||||
&self->carveout[page_size + 3 * page_size]);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
|
||||
/* The VMAs should have merged, if not forked. */
|
||||
ASSERT_TRUE(find_vma_procmap(procmap, ptr_b));
|
||||
ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b);
|
||||
|
||||
offset = variant->forked ? 3 * page_size : 6 * page_size;
|
||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + offset);
|
||||
}
|
||||
|
||||
TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_next)
|
||||
{
|
||||
struct procmap_fd *procmap = &self->procmap;
|
||||
unsigned int page_size = self->page_size;
|
||||
unsigned long offset;
|
||||
char *ptr_a, *ptr_b;
|
||||
|
||||
/*
|
||||
* mremap() such that A and B merge:
|
||||
*
|
||||
* |---------------------------|
|
||||
* | \ |
|
||||
* | |-----------| / |---------|
|
||||
* v | unfaulted | \ | faulted |
|
||||
* |-----------| / |---------|
|
||||
* B \ A
|
||||
*
|
||||
* Then unmap VMA A to trigger the bug.
|
||||
*/
|
||||
|
||||
/* Map VMA A into place. */
|
||||
ptr_a = mmap(&self->carveout[page_size], 3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
/* Fault it in. */
|
||||
ptr_a[0] = 'x';
|
||||
|
||||
if (variant->forked) {
|
||||
pid_t pid = do_fork(&self->procmap);
|
||||
|
||||
ASSERT_NE(pid, -1);
|
||||
if (pid != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now move it out of the way so we can place VMA B in position,
|
||||
* unfaulted.
|
||||
*/
|
||||
ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
|
||||
/* Map VMA B into place. */
|
||||
ptr_b = mmap(&self->carveout[page_size + 3 * page_size], 3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_b, MAP_FAILED);
|
||||
|
||||
/*
|
||||
* Now move VMA A into position with MREMAP_DONTUNMAP to catch incorrect
|
||||
* anon_vma propagation.
|
||||
*/
|
||||
ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
|
||||
&self->carveout[page_size]);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
|
||||
/* The VMAs should have merged, if not forked. */
|
||||
ASSERT_TRUE(find_vma_procmap(procmap, ptr_a));
|
||||
ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a);
|
||||
offset = variant->forked ? 3 * page_size : 6 * page_size;
|
||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + offset);
|
||||
}
|
||||
|
||||
TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev_unfaulted_next)
|
||||
{
|
||||
struct procmap_fd *procmap = &self->procmap;
|
||||
unsigned int page_size = self->page_size;
|
||||
unsigned long offset;
|
||||
char *ptr_a, *ptr_b, *ptr_c;
|
||||
|
||||
/*
|
||||
* mremap() with MREMAP_DONTUNMAP such that A, B and C merge:
|
||||
*
|
||||
* |---------------------------|
|
||||
* | \ |
|
||||
* |-----------| | |-----------| / |---------|
|
||||
* | unfaulted | v | unfaulted | \ | faulted |
|
||||
* |-----------| |-----------| / |---------|
|
||||
* A C \ B
|
||||
*/
|
||||
|
||||
/* Map VMA B into place. */
|
||||
ptr_b = mmap(&self->carveout[page_size + 3 * page_size], 3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_b, MAP_FAILED);
|
||||
/* Fault it in. */
|
||||
ptr_b[0] = 'x';
|
||||
|
||||
if (variant->forked) {
|
||||
pid_t pid = do_fork(&self->procmap);
|
||||
|
||||
ASSERT_NE(pid, -1);
|
||||
if (pid != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now move it out of the way so we can place VMAs A, C in position,
|
||||
* unfaulted.
|
||||
*/
|
||||
ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
|
||||
ASSERT_NE(ptr_b, MAP_FAILED);
|
||||
|
||||
/* Map VMA A into place. */
|
||||
|
||||
ptr_a = mmap(&self->carveout[page_size], 3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
|
||||
/* Map VMA C into place. */
|
||||
ptr_c = mmap(&self->carveout[page_size + 3 * page_size + 3 * page_size],
|
||||
3 * page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_c, MAP_FAILED);
|
||||
|
||||
/*
|
||||
* Now move VMA B into position with MREMAP_DONTUNMAP to catch incorrect
|
||||
* anon_vma propagation.
|
||||
*/
|
||||
ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
|
||||
&self->carveout[page_size + 3 * page_size]);
|
||||
ASSERT_NE(ptr_b, MAP_FAILED);
|
||||
|
||||
/* The VMAs should have merged, if not forked. */
|
||||
ASSERT_TRUE(find_vma_procmap(procmap, ptr_a));
|
||||
ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a);
|
||||
offset = variant->forked ? 3 * page_size : 9 * page_size;
|
||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + offset);
|
||||
|
||||
/* If forked, B and C should also not have merged. */
|
||||
if (variant->forked) {
|
||||
ASSERT_TRUE(find_vma_procmap(procmap, ptr_b));
|
||||
ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b);
|
||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + 3 * page_size);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev_faulted_next)
|
||||
{
|
||||
struct procmap_fd *procmap = &self->procmap;
|
||||
unsigned int page_size = self->page_size;
|
||||
char *ptr_a, *ptr_b, *ptr_bc;
|
||||
|
||||
/*
|
||||
* mremap() with MREMAP_DONTUNMAP such that A, B and C merge:
|
||||
*
|
||||
* |---------------------------|
|
||||
* | \ |
|
||||
* |-----------| | |-----------| / |---------|
|
||||
* | unfaulted | v | faulted | \ | faulted |
|
||||
* |-----------| |-----------| / |---------|
|
||||
* A C \ B
|
||||
*/
|
||||
|
||||
/*
|
||||
* Map VMA B and C into place. We have to map them together so their
|
||||
* anon_vma is the same and the vma->vm_pgoff's are correctly aligned.
|
||||
*/
|
||||
ptr_bc = mmap(&self->carveout[page_size + 3 * page_size],
|
||||
3 * page_size + 3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_bc, MAP_FAILED);
|
||||
|
||||
/* Fault it in. */
|
||||
ptr_bc[0] = 'x';
|
||||
|
||||
if (variant->forked) {
|
||||
pid_t pid = do_fork(&self->procmap);
|
||||
|
||||
ASSERT_NE(pid, -1);
|
||||
if (pid != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now move VMA B out the way (splitting VMA BC) so we can place VMA A
|
||||
* in position, unfaulted, and leave the remainder of the VMA we just
|
||||
* moved in place, faulted, as VMA C.
|
||||
*/
|
||||
ptr_b = mremap(ptr_bc, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]);
|
||||
ASSERT_NE(ptr_b, MAP_FAILED);
|
||||
|
||||
/* Map VMA A into place. */
|
||||
ptr_a = mmap(&self->carveout[page_size], 3 * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
|
||||
ASSERT_NE(ptr_a, MAP_FAILED);
|
||||
|
||||
/*
|
||||
* Now move VMA B into position with MREMAP_DONTUNMAP to catch incorrect
|
||||
* anon_vma propagation.
|
||||
*/
|
||||
ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size,
|
||||
MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
|
||||
&self->carveout[page_size + 3 * page_size]);
|
||||
ASSERT_NE(ptr_b, MAP_FAILED);
|
||||
|
||||
/* The VMAs should have merged. A,B,C if unforked, B, C if forked. */
|
||||
if (variant->forked) {
|
||||
ASSERT_TRUE(find_vma_procmap(procmap, ptr_b));
|
||||
ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b);
|
||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + 6 * page_size);
|
||||
} else {
|
||||
ASSERT_TRUE(find_vma_procmap(procmap, ptr_a));
|
||||
ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a);
|
||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + 9 * page_size);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
|
||||
Reference in New Issue
Block a user