Commit 883af14e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "26 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (26 commits)
  MAINTAINERS: add Dan Streetman to zbud maintainers
  MAINTAINERS: add Dan Streetman to zswap maintainers
  mm: do not export ioremap_page_range symbol for external module
  mn10300: fix build error of missing fpu_save()
  romfs: use different way to generate fsid for BLOCK or MTD
  frv: add missing atomic64 operations
  mm, page_alloc: fix premature OOM when racing with cpuset mems update
  mm, page_alloc: move cpuset seqcount checking to slowpath
  mm, page_alloc: fix fast-path race with cpuset update or removal
  mm, page_alloc: fix check for NULL preferred_zone
  kernel/panic.c: add missing \n
  fbdev: color map copying bounds checking
  frv: add atomic64_add_unless()
  mm/mempolicy.c: do not put mempolicy before using its nodemask
  radix-tree: fix private list warnings
  Documentation/filesystems/proc.txt: add VmPin
  mm, memcg: do not retry precharge charges
  proc: add a schedule point in proc_pid_readdir()
  mm: alloc_contig: re-allow CMA to compact FS pages
  mm/slub.c: trace free objects at KERN_INFO
  ...
parents 0263d4eb aab45453
......@@ -212,10 +212,11 @@ asynchronous manner and the value may not be very precise. To see a precise
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
It's slow but very precise.
Table 1-2: Contents of the status files (as of 4.1)
Table 1-2: Contents of the status files (as of 4.8)
..............................................................................
Field Content
Name filename of the executable
Umask file mode creation mask
State state (R is running, S is sleeping, D is sleeping
in an uninterruptible wait, Z is zombie,
T is traced or stopped)
......@@ -226,7 +227,6 @@ Table 1-2: Contents of the status files (as of 4.1)
TracerPid PID of process tracing this process (0 if not)
Uid Real, effective, saved set, and file system UIDs
Gid Real, effective, saved set, and file system GIDs
Umask file mode creation mask
FDSize number of file descriptor slots currently allocated
Groups supplementary group list
NStgid descendant namespace thread group ID hierarchy
......@@ -236,6 +236,7 @@ Table 1-2: Contents of the status files (as of 4.1)
VmPeak peak virtual memory size
VmSize total program size
VmLck locked memory size
VmPin pinned memory size
VmHWM peak resident set size ("high water mark")
VmRSS size of memory portions. It contains the three
following parts (VmRSS = RssAnon + RssFile + RssShmem)
......
......@@ -13625,6 +13625,7 @@ F: drivers/net/hamradio/z8530.h
ZBUD COMPRESSED PAGE ALLOCATOR
M: Seth Jennings <sjenning@redhat.com>
M: Dan Streetman <ddstreet@ieee.org>
L: linux-mm@kvack.org
S: Maintained
F: mm/zbud.c
......@@ -13680,6 +13681,7 @@ F: Documentation/vm/zsmalloc.txt
ZSWAP COMPRESSED SWAP CACHING
M: Seth Jennings <sjenning@redhat.com>
M: Dan Streetman <ddstreet@ieee.org>
L: linux-mm@kvack.org
S: Maintained
F: mm/zswap.c
......
......@@ -139,7 +139,7 @@ static inline void atomic64_dec(atomic64_t *v)
#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
#define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
#define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new))
#define atomic_xchg(v, new) (xchg(&(v)->counter, new))
......@@ -161,6 +161,39 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
return c;
}
static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
{
long long c, old;
c = atomic64_read(v);
for (;;) {
if (unlikely(c == u))
break;
old = atomic64_cmpxchg(v, c, c + i);
if (likely(old == c))
break;
c = old;
}
return c != u;
}
static inline long long atomic64_dec_if_positive(atomic64_t *v)
{
long long c, old, dec;
c = atomic64_read(v);
for (;;) {
dec = c - 1;
if (unlikely(dec < 0))
break;
old = atomic64_cmpxchg((v), c, dec);
if (likely(old == c))
break;
c = old;
}
return dec;
}
#define ATOMIC_OP(op) \
static inline int atomic_fetch_##op(int i, atomic_t *v) \
{ \
......
......@@ -16,7 +16,7 @@
struct task_struct;
struct thread_struct;
#if !defined(CONFIG_LAZY_SAVE_FPU)
#if defined(CONFIG_FPU) && !defined(CONFIG_LAZY_SAVE_FPU)
struct fpu_state_struct;
extern asmlinkage void fpu_save(struct fpu_state_struct *);
#define switch_fpu(prev, next) \
......
......@@ -408,14 +408,14 @@ static ssize_t show_valid_zones(struct device *dev,
sprintf(buf, "%s", zone->name);
/* MMOP_ONLINE_KERNEL */
zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL);
zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift);
if (zone_shift) {
strcat(buf, " ");
strcat(buf, (zone + zone_shift)->name);
}
/* MMOP_ONLINE_MOVABLE */
zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE);
zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift);
if (zone_shift) {
strcat(buf, " ");
strcat(buf, (zone + zone_shift)->name);
......
......@@ -330,7 +330,7 @@ static int h_memstick_read_dev_id(struct memstick_dev *card,
struct ms_id_register id_reg;
if (!(*mrq)) {
memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, NULL,
memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, &id_reg,
sizeof(struct ms_id_register));
*mrq = &card->current_mrq;
return 0;
......
......@@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap)
int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to)
{
int tooff = 0, fromoff = 0;
int size;
unsigned int tooff = 0, fromoff = 0;
size_t size;
if (to->start > from->start)
fromoff = to->start - from->start;
else
tooff = from->start - to->start;
size = to->len - tooff;
if (size > (int) (from->len - fromoff))
size = from->len - fromoff;
if (size <= 0)
if (fromoff >= from->len || tooff >= to->len)
return -EINVAL;
size = min_t(size_t, to->len - tooff, from->len - fromoff);
if (size == 0)
return -EINVAL;
size *= sizeof(u16);
......@@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to)
int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to)
{
int tooff = 0, fromoff = 0;
int size;
unsigned int tooff = 0, fromoff = 0;
size_t size;
if (to->start > from->start)
fromoff = to->start - from->start;
else
tooff = from->start - to->start;
size = to->len - tooff;
if (size > (int) (from->len - fromoff))
size = from->len - fromoff;
if (size <= 0)
if (fromoff >= from->len || tooff >= to->len)
return -EINVAL;
size = min_t(size_t, to->len - tooff, from->len - fromoff);
if (size == 0)
return -EINVAL;
size *= sizeof(u16);
......
......@@ -38,6 +38,7 @@ config FS_DAX
bool "Direct Access (DAX) support"
depends on MMU
depends on !(ARM || MIPS || SPARC)
select FS_IOMAP
help
Direct Access (DAX) can be used on memory-backed block devices.
If the block device supports DAX and the filesystem supports DAX,
......
......@@ -990,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
#ifdef CONFIG_FS_IOMAP
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
......@@ -1428,4 +1427,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
#endif /* CONFIG_FS_DAX_PMD */
#endif /* CONFIG_FS_IOMAP */
config EXT2_FS
tristate "Second extended fs support"
select FS_IOMAP if FS_DAX
help
Ext2 is a standard Linux file system for hard disks.
......
......@@ -37,7 +37,6 @@ config EXT4_FS
select CRC16
select CRYPTO
select CRYPTO_CRC32C
select FS_IOMAP if FS_DAX
help
This is the next generation of the ext3 filesystem.
......
......@@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
iter.tgid += 1, iter = next_tgid(ns, iter)) {
char name[PROC_NUMBUF];
int len;
cond_resched();
if (!has_pid_permissions(ns, iter.task, 2))
continue;
......
......@@ -74,6 +74,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
#include <linux/major.h>
#include "internal.h"
static struct kmem_cache *romfs_inode_cachep;
......@@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode)
static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
u64 id = 0;
/* When calling huge_encode_dev(),
* use sb->s_bdev->bd_dev when,
* - CONFIG_ROMFS_ON_BLOCK defined
* use sb->s_dev when,
* - CONFIG_ROMFS_ON_BLOCK undefined and
* - CONFIG_ROMFS_ON_MTD defined
* leave id as 0 when,
* - CONFIG_ROMFS_ON_BLOCK undefined and
* - CONFIG_ROMFS_ON_MTD undefined
*/
if (sb->s_bdev)
id = huge_encode_dev(sb->s_bdev->bd_dev);
else if (sb->s_dev)
id = huge_encode_dev(sb->s_dev);
buf->f_type = ROMFS_MAGIC;
buf->f_namelen = ROMFS_MAXFN;
......@@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_RDONLY | MS_NOATIME;
sb->s_op = &romfs_super_ops;
#ifdef CONFIG_ROMFS_ON_MTD
/* Use same dev ID from the underlying mtdblock device */
if (sb->s_mtd)
sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
#endif
/* read the image superblock and check it */
rsb = kmalloc(512, GFP_KERNEL);
if (!rsb)
......
......@@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
struct userfaultfd_ctx *ctx;
bool waken;
};
struct userfaultfd_wake_range {
......@@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
if (len && (start > uwq->msg.arg.pagefault.address ||
start + len <= uwq->msg.arg.pagefault.address))
goto out;
WRITE_ONCE(uwq->waken, true);
/*
* The implicit smp_mb__before_spinlock in try_to_wake_up()
* renders uwq->waken visible to other CPUs before the task is
* waken.
*/
ret = wake_up_state(wq->private, mode);
if (ret)
/*
......@@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
struct userfaultfd_wait_queue uwq;
int ret;
bool must_wait, return_to_userland;
long blocking_state;
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
......@@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
uwq.wq.private = current;
uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
uwq.ctx = ctx;
uwq.waken = false;
return_to_userland =
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
TASK_KILLABLE;
spin_lock(&ctx->fault_pending_wqh.lock);
/*
......@@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* following the spin_unlock to happen before the list_add in
* __add_wait_queue.
*/
set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
TASK_KILLABLE);
set_current_state(blocking_state);
spin_unlock(&ctx->fault_pending_wqh.lock);
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
......@@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
wake_up_poll(&ctx->fd_wqh, POLLIN);
schedule();
ret |= VM_FAULT_MAJOR;
/*
* False wakeups can orginate even from rwsem before
* up_read() however userfaults will wait either for a
* targeted wakeup on the specific uwq waitqueue from
* wake_userfault() or for signals or for uffd
* release.
*/
while (!READ_ONCE(uwq.waken)) {
/*
* This needs the full smp_store_mb()
* guarantee as the state write must be
* visible to other CPUs before reading
* uwq.waken from other CPUs.
*/
set_current_state(blocking_state);
if (READ_ONCE(uwq.waken) ||
READ_ONCE(ctx->released) ||
(return_to_userland ? signal_pending(current) :
fatal_signal_pending(current)))
break;
schedule();
}
}
__set_current_state(TASK_RUNNING);
......
......@@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
enum zone_type target);
extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
enum zone_type target, int *zone_shift);
#endif /* __LINUX_MEMORY_HOTPLUG_H */
......@@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
* @zonelist - The zonelist to search for a suitable zone
* @highest_zoneidx - The zone index of the highest zone to return
* @nodes - An optional nodemask to filter the zonelist with
* @zone - The first suitable zone found is returned via this parameter
* @return - Zoneref pointer for the first suitable zone found (see below)
*
* This function returns the first zone at or below a given zone index that is
* within the allowed nodemask. The zoneref returned is a cursor that can be
* used to iterate the zonelist with next_zones_zonelist by advancing it by
* one before calling.
*
* When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
* never NULL). This may happen either genuinely, or due to concurrent nodemask
* update due to cpuset modification.
*/
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
enum zone_type highest_zoneidx,
......
......@@ -110,6 +110,7 @@ extern int watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
extern unsigned long *watchdog_cpumask_bits;
extern atomic_t watchdog_park_in_progress;
#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
......
......@@ -249,7 +249,7 @@ void panic(const char *fmt, ...)
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked.
*/
pr_emerg("Rebooting in %d seconds..", panic_timeout);
pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
touch_nmi_watchdog();
......
......@@ -49,6 +49,8 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
#define for_each_watchdog_cpu(cpu) \
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
/*
* The 'watchdog_running' variable is set to 1 when the watchdog threads
* are registered/started and is set to 0 when the watchdog threads are
......@@ -260,6 +262,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (atomic_read(&watchdog_park_in_progress) != 0)
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
watchdog_interrupt_count();
......@@ -467,12 +472,16 @@ static int watchdog_park_threads(void)
{
int cpu, ret = 0;
atomic_set(&watchdog_park_in_progress, 1);
for_each_watchdog_cpu(cpu) {
ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
if (ret)
break;
}
atomic_set(&watchdog_park_in_progress, 0);
return ret;
}
......
......@@ -84,6 +84,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
if (atomic_read(&watchdog_park_in_progress) != 0)
return;
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment