Commit 883af14e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "26 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (26 commits)
  MAINTAINERS: add Dan Streetman to zbud maintainers
  MAINTAINERS: add Dan Streetman to zswap maintainers
  mm: do not export ioremap_page_range symbol for external module
  mn10300: fix build error of missing fpu_save()
  romfs: use different way to generate fsid for BLOCK or MTD
  frv: add missing atomic64 operations
  mm, page_alloc: fix premature OOM when racing with cpuset mems update
  mm, page_alloc: move cpuset seqcount checking to slowpath
  mm, page_alloc: fix fast-path race with cpuset update or removal
  mm, page_alloc: fix check for NULL preferred_zone
  kernel/panic.c: add missing \n
  fbdev: color map copying bounds checking
  frv: add atomic64_add_unless()
  mm/mempolicy.c: do not put mempolicy before using its nodemask
  radix-tree: fix private list warnings
  Documentation/filesystems/proc.txt: add VmPin
  mm, memcg: do not retry precharge charges
  proc: add a schedule point in proc_pid_readdir()
  mm: alloc_contig: re-allow CMA to compact FS pages
  mm/slub.c: trace free objects at KERN_INFO
  ...
parents 0263d4eb aab45453
......@@ -144,4 +144,3 @@ int ioremap_page_range(unsigned long addr,
return err;
}
EXPORT_SYMBOL_GPL(ioremap_page_range);
......@@ -769,7 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
struct radix_tree_node *old = child;
offset = child->offset + 1;
child = child->parent;
WARN_ON_ONCE(!list_empty(&node->private_list));
WARN_ON_ONCE(!list_empty(&old->private_list));
radix_tree_node_free(old);
if (old == entry_to_node(node))
return;
......
......@@ -783,6 +783,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pmd_lockptr(mm, pmd));
/*
* When we COW a devmap PMD entry, we split it into PTEs, so we should
* not be in this function with `flags & FOLL_COW` set.
*/
WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
if (flags & FOLL_WRITE && !pmd_write(*pmd))
return NULL;
......@@ -1128,6 +1134,16 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
return ret;
}
/*
* FOLL_FORCE can write to even unwritable pmd's, but only
* after we've gone through a COW cycle and they are dirty.
*/
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{
return pmd_write(pmd) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
......@@ -1138,7 +1154,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
assert_spin_locked(pmd_lockptr(mm, pmd));
if (flags & FOLL_WRITE && !pmd_write(*pmd))
if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
goto out;
/* Avoid dumping huge zero page */
......
......@@ -4353,9 +4353,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
return ret;
}
/* Try charges one by one with reclaim */
/* Try charges one by one with reclaim, but do not retry */
while (count--) {
ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
if (ret)
return ret;
mc.precharge++;
......
......@@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg)
node_set_state(node, N_MEMORY);
}
int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
enum zone_type target)
bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
enum zone_type target, int *zone_shift)
{
struct zone *zone = page_zone(pfn_to_page(pfn));
enum zone_type idx = zone_idx(zone);
int i;
*zone_shift = 0;
if (idx < target) {
/* pages must be at end of current zone */
if (pfn + nr_pages != zone_end_pfn(zone))
return 0;
return false;
/* no zones in use between current zone and target */
for (i = idx + 1; i < target; i++)
if (zone_is_initialized(zone - idx + i))
return 0;
return false;
}
if (target < idx) {
/* pages must be at beginning of current zone */
if (pfn != zone->zone_start_pfn)
return 0;
return false;
/* no zones in use between current zone and target */
for (i = target + 1; i < idx; i++)
if (zone_is_initialized(zone - idx + i))
return 0;
return false;
}
return target - idx;
*zone_shift = target - idx;
return true;
}
/* Must be protected by mem_hotplug_begin() */
......@@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
!can_online_high_movable(zone))
return -EINVAL;
if (online_type == MMOP_ONLINE_KERNEL)
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL);
else if (online_type == MMOP_ONLINE_MOVABLE)
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE);
if (online_type == MMOP_ONLINE_KERNEL) {
if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift))
return -EINVAL;
} else if (online_type == MMOP_ONLINE_MOVABLE) {
if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift))
return -EINVAL;
}
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
if (!zone)
......
......@@ -2017,8 +2017,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
nmask = policy_nodemask(gfp, pol);
zl = policy_zonelist(gfp, pol, node);
mpol_cond_put(pol);
page = __alloc_pages_nodemask(gfp, order, zl, nmask);
mpol_cond_put(pol);
out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
......
......@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct page *page = NULL;
unsigned int alloc_flags;
unsigned long did_some_progress;
enum compact_priority compact_priority = DEF_COMPACT_PRIORITY;
enum compact_priority compact_priority;
enum compact_result compact_result;
int compaction_retries = 0;
int no_progress_loops = 0;
int compaction_retries;
int no_progress_loops;
unsigned long alloc_start = jiffies;
unsigned int stall_timeout = 10 * HZ;
unsigned int cpuset_mems_cookie;
/*
* In the slowpath, we sanity check order to avoid ever trying to
......@@ -3549,6 +3550,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
gfp_mask &= ~__GFP_ATOMIC;
retry_cpuset:
compaction_retries = 0;
no_progress_loops = 0;
compact_priority = DEF_COMPACT_PRIORITY;
cpuset_mems_cookie = read_mems_allowed_begin();
/*
* We need to recalculate the starting point for the zonelist iterator
* because we might have used different nodemask in the fast path, or
* there was a cpuset modification and we are retrying - otherwise we
* could end up iterating over non-eligible zones endlessly.
*/
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->high_zoneidx, ac->nodemask);
if (!ac->preferred_zoneref->zone)
goto nopage;
/*
* The fast path uses conservative alloc_flags to succeed only until
* kswapd needs to be woken up, and to avoid the cost of setting up
......@@ -3708,6 +3726,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
&compaction_retries))
goto retry;
/*
* It's possible we raced with cpuset update so the OOM would be
* premature (see below the nopage: label for full explanation).
*/
if (read_mems_allowed_retry(cpuset_mems_cookie))
goto retry_cpuset;
/* Reclaim has failed us, start killing things */
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
if (page)
......@@ -3720,6 +3745,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
}
nopage:
/*
* When updating a task's mems_allowed or mempolicy nodemask, it is
* possible to race with parallel threads in such a way that our
* allocation can fail while the mask is being updated. If we are about
* to fail, check if the cpuset changed during allocation and if so,
* retry.
*/
if (read_mems_allowed_retry(cpuset_mems_cookie))
goto retry_cpuset;
warn_alloc(gfp_mask,
"page allocation failure: order:%u", order);
got_pg:
......@@ -3734,7 +3769,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
struct page *page;
unsigned int cpuset_mems_cookie;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = {
......@@ -3771,9 +3805,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
/* Dirty zone balancing only done in the fast path */
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
......@@ -3784,8 +3815,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
*/
ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
ac.high_zoneidx, ac.nodemask);
if (!ac.preferred_zoneref) {
if (!ac.preferred_zoneref->zone) {
page = NULL;
/*
* This might be due to race with cpuset_current_mems_allowed
* update, so make sure we retry with original nodemask in the
* slow path.
*/
goto no_zone;
}
......@@ -3794,6 +3830,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (likely(page))
goto out;
no_zone:
/*
* Runtime PM, block IO and its error handling path can deadlock
* because I/O on the device might not complete.
......@@ -3805,21 +3842,10 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
* Restore the original nodemask if it was potentially replaced with
* &cpuset_current_mems_allowed to optimize the fast-path attempt.
*/
if (cpusets_enabled())
if (unlikely(ac.nodemask != nodemask))
ac.nodemask = nodemask;
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
no_zone:
/*
* When updating a task's mems_allowed, it is possible to race with
* parallel threads in such a way that an allocation can fail while
* the mask is being updated. If a page allocation is about to fail,
* check if the cpuset changed during allocation and if so, retry.
*/
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
alloc_mask = gfp_mask;
goto retry_cpuset;
}
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
out:
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
......@@ -7248,6 +7274,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
.zone = page_zone(pfn_to_page(start)),
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
.gfp_mask = GFP_KERNEL,
};
INIT_LIST_HEAD(&cc.migratepages);
......
......@@ -496,10 +496,11 @@ static inline int check_valid_pointer(struct kmem_cache *s,
return 1;
}
static void print_section(char *text, u8 *addr, unsigned int length)
static void print_section(char *level, char *text, u8 *addr,
unsigned int length)
{
metadata_access_enable();
print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
length, 1);
metadata_access_disable();
}
......@@ -636,14 +637,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
p, p - addr, get_freepointer(s, p));
if (s->flags & SLAB_RED_ZONE)
print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
s->red_left_pad);
else if (p > addr + 16)
print_section("Bytes b4 ", p - 16, 16);
print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
print_section("Object ", p, min_t(unsigned long, s->object_size,
PAGE_SIZE));
print_section(KERN_ERR, "Object ", p,
min_t(unsigned long, s->object_size, PAGE_SIZE));
if (s->flags & SLAB_RED_ZONE)
print_section("Redzone ", p + s->object_size,
print_section(KERN_ERR, "Redzone ", p + s->object_size,
s->inuse - s->object_size);
if (s->offset)
......@@ -658,7 +660,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
print_section("Padding ", p + off, size_from_object(s) - off);
print_section(KERN_ERR, "Padding ", p + off,
size_from_object(s) - off);
dump_stack();
}
......@@ -820,7 +823,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
end--;
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
print_section("Padding ", end - remainder, remainder);
print_section(KERN_ERR, "Padding ", end - remainder, remainder);
restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
return 0;
......@@ -973,7 +976,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
page->freelist);
if (!alloc)
print_section("Object ", (void *)object,
print_section(KERN_INFO, "Object ", (void *)object,
s->object_size);
dump_stack();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment