Commit 9e2d8656 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'akpm' (Andrew's patch-bomb)

Merge patches from Andrew Morton:
 "A few misc things and very nearly all of the MM tree.  A tremendous
  amount of stuff (again), including a significant rbtree library
  rework."

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (160 commits)
  sparc64: Support transparent huge pages.
  mm: thp: Use more portable PMD clearing sequenece in zap_huge_pmd().
  mm: Add and use update_mmu_cache_pmd() in transparent huge page code.
  sparc64: Document PGD and PMD layout.
  sparc64: Eliminate PTE table memory wastage.
  sparc64: Halve the size of PTE tables
  sparc64: Only support 4MB huge pages and 8KB base pages.
  memory-hotplug: suppress "Trying to free nonexistent resource <XXXXXXXXXXXXXXXX-YYYYYYYYYYYYYYYY>" warning
  mm: memcg: clean up mm_match_cgroup() signature
  mm: document PageHuge somewhat
  mm: use %pK for /proc/vmallocinfo
  mm, thp: fix mlock statistics
  mm, thp: fix mapped pages avoiding unevictable list on mlock
  memory-hotplug: update memory block's state and notify userspace
  memory-hotplug: preparation to notify memory block's state at memory hot remove
  mm: avoid section mismatch warning for memblock_type_name
  make GFP_NOTRACK definition unconditional
  cma: decrease cc.nr_migratepages after reclaiming pagelist
  CMA: migrate mlocked pages
  kpageflags: fix wrong KPF_THP on non-huge compound pages
  ...
parents 1ea4f4f8 9e695d2e
......@@ -380,7 +380,7 @@ int vectors_user_mapping(void)
return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYEXEC |
VM_RESERVED,
VM_DONTEXPAND | VM_DONTDUMP,
NULL);
}
......
......@@ -10,6 +10,7 @@ config X86_32
def_bool y
depends on !64BIT
select CLKSRC_I8253
select HAVE_UID16
config X86_64
def_bool y
......@@ -46,6 +47,7 @@ config X86
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_SYSCALL_TRACEPOINTS
select SYSCTL_EXCEPTION_TRACE
select HAVE_KVM
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
......@@ -65,6 +67,7 @@ config X86
select HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_DEBUG_KMEMLEAK
select ANON_INODES
select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
select HAVE_CMPXCHG_LOCAL if !M386
......@@ -85,6 +88,7 @@ config X86
select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_BPF_JIT if X86_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
......@@ -2168,6 +2172,7 @@ config IA32_EMULATION
bool "IA32 Emulation"
depends on X86_64
select COMPAT_BINFMT_ELF
select HAVE_UID16
---help---
Include code to run legacy 32-bit programs under a
64-bit kernel. You should likely turn this on, unless you're
......
......@@ -240,30 +240,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
return c;
}
/*
* atomic_dec_if_positive - decrement by 1 if old value positive
* @v: pointer of type atomic_t
*
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
static inline int atomic_dec_if_positive(atomic_t *v)
{
int c, old, dec;
c = atomic_read(v);
for (;;) {
dec = c - 1;
if (unlikely(dec < 0))
break;
old = atomic_cmpxchg((v), c, dec);
if (likely(old == c))
break;
c = old;
}
return dec;
}
/**
* atomic_inc_short - increment of a short integer
* @v: pointer to type int
......
......@@ -90,4 +90,8 @@ static inline void arch_release_hugepage(struct page *page)
{
}
static inline void arch_clear_hugepage_flags(struct page *page)
{
}
#endif /* _ASM_X86_HUGETLB_H */
......@@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
static inline int pmd_large(pmd_t pte)
{
return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
(_PAGE_PSE | _PAGE_PRESENT);
return pmd_flags(pte) & _PAGE_PSE;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
......@@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte)
static inline int pmd_present(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_PRESENT;
/*
* Checking for _PAGE_PSE is needed too because
* split_huge_page will temporarily clear the present bit (but
* the _PAGE_PSE flag will remain set at all times while the
* _PAGE_PRESENT bit is clear).
*/
return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
}
static inline int pmd_none(pmd_t pmd)
......
......@@ -71,6 +71,7 @@ do { \
* tables contain all the necessary information.
*/
#define update_mmu_cache(vma, address, ptep) do { } while (0)
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#endif /* !__ASSEMBLY__ */
......
......@@ -143,6 +143,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_unmap(pte) ((void)(pte))/* NOP */
#define update_mmu_cache(vma, address, ptep) do { } while (0)
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
......
......@@ -1220,6 +1220,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
......
......@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
struct prio_tree_iter iter;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
......@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
......
......@@ -664,20 +664,20 @@ static void free_pfn_range(u64 paddr, unsigned long size)
}
/*
* track_pfn_vma_copy is called when vma that is covering the pfnmap gets
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
*
* If the vma has a linear pfn mapping for the entire range, we get the prot
* from pte and reserve the entire vma range with single reserve_pfn_range call.
*/
int track_pfn_vma_copy(struct vm_area_struct *vma)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (is_linear_pfn_mapping(vma)) {
if (vma->vm_flags & VM_PAT) {
/*
* reserve the whole chunk covered by vma. We need the
* starting address and protection from pte.
......@@ -694,31 +694,59 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
}
/*
* track_pfn_vma_new is called when a _new_ pfn mapping is being established
* for physical range indicated by pfn and size.
*
* prot is passed in as a parameter for the new mapping. If the vma has a
* linear pfn mapping for the entire range reserve the entire vma range with
* single reserve_pfn_range call.
*/
int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size)
int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr, unsigned long size)
{
resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
unsigned long flags;
resource_size_t paddr;
unsigned long vma_size = vma->vm_end - vma->vm_start;
if (is_linear_pfn_mapping(vma)) {
/* reserve the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
return reserve_pfn_range(paddr, vma_size, prot, 0);
/* reserve the whole chunk starting from paddr */
if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
int ret;
ret = reserve_pfn_range(paddr, size, prot, 0);
if (!ret)
vma->vm_flags |= VM_PAT;
return ret;
}
if (!pat_enabled)
return 0;
/* for vm_insert_pfn and friends, we set prot based on lookup */
flags = lookup_memtype(pfn << PAGE_SHIFT);
/*
* For anything smaller than the vma size we set prot based on the
* lookup.
*/
flags = lookup_memtype(paddr);
/* Check memtype for the remaining pages */
while (size > PAGE_SIZE) {
size -= PAGE_SIZE;
paddr += PAGE_SIZE;
if (flags != lookup_memtype(paddr))
return -EINVAL;
}
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
flags);
return 0;
}
int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn)
{
unsigned long flags;
if (!pat_enabled)
return 0;
/* Set prot based on lookup */
flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
flags);
......@@ -726,22 +754,31 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
}
/*
* untrack_pfn_vma is called while unmapping a pfnmap for a region.
* untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
* can be for the entire vma (in which case size can be zero).
* can be for the entire vma (in which case pfn, size are zero).
*/
void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size)
void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size)
{
resource_size_t paddr;
unsigned long vma_size = vma->vm_end - vma->vm_start;
unsigned long prot;
if (is_linear_pfn_mapping(vma)) {
/* free the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
free_pfn_range(paddr, vma_size);
if (!(vma->vm_flags & VM_PAT))
return;
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
WARN_ON_ONCE(1);
return;
}
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
vma->vm_flags &= ~VM_PAT;
}
pgprot_t pgprot_writecombine(pgprot_t prot)
......
......@@ -12,7 +12,7 @@
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/rbtree_augmented.h>
#include <linux/sched.h>
#include <linux/gfp.h>
......@@ -54,29 +54,24 @@ static u64 get_subtree_max_end(struct rb_node *node)
return ret;
}
/* Update 'subtree_max_end' for a node, based on node and its children */
static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
static u64 compute_subtree_max_end(struct memtype *data)
{
struct memtype *data;
u64 max_end, child_max_end;
if (!node)
return;
u64 max_end = data->end, child_max_end;
data = container_of(node, struct memtype, rb);
max_end = data->end;
child_max_end = get_subtree_max_end(node->rb_right);
child_max_end = get_subtree_max_end(data->rb.rb_right);
if (child_max_end > max_end)
max_end = child_max_end;
child_max_end = get_subtree_max_end(node->rb_left);
child_max_end = get_subtree_max_end(data->rb.rb_left);
if (child_max_end > max_end)
max_end = child_max_end;
data->subtree_max_end = max_end;
return max_end;
}
RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
u64, subtree_max_end, compute_subtree_max_end)
/* Find the first (lowest start addr) overlapping range from rb tree */
static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
u64 start, u64 end)
......@@ -179,15 +174,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
struct memtype *data = container_of(*node, struct memtype, rb);
parent = *node;
if (data->subtree_max_end < newdata->end)
data->subtree_max_end = newdata->end;
if (newdata->start <= data->start)
node = &((*node)->rb_left);
else if (newdata->start > data->start)
node = &((*node)->rb_right);
}
newdata->subtree_max_end = newdata->end;
rb_link_node(&newdata->rb, parent, node);
rb_insert_color(&newdata->rb, root);
rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
}
int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
......@@ -209,16 +206,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
struct rb_node *deepest;
struct memtype *data;
data = memtype_rb_exact_match(&memtype_rbroot, start, end);
if (!data)
goto out;
deepest = rb_augment_erase_begin(&data->rb);
rb_erase(&data->rb, &memtype_rbroot);
rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
out:
return data;
}
......
......@@ -2451,8 +2451,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
(VM_PFNMAP | VM_RESERVED | VM_IO)));
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
rmd.mfn = mfn;
rmd.prot = prot;
......
......@@ -126,6 +126,7 @@ void do_page_fault(struct pt_regs *regs)
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
......
......@@ -248,26 +248,23 @@ static bool pages_correctly_reserved(unsigned long start_pfn,
static int
memory_block_action(unsigned long phys_index, unsigned long action)
{
unsigned long start_pfn, start_paddr;
unsigned long start_pfn;
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
struct page *first_page;
int ret;
first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
start_pfn = page_to_pfn(first_page);
switch (action) {
case MEM_ONLINE:
start_pfn = page_to_pfn(first_page);
if (!pages_correctly_reserved(start_pfn, nr_pages))
return -EBUSY;
ret = online_pages(start_pfn, nr_pages);
break;
case MEM_OFFLINE:
start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
ret = remove_memory(start_paddr,
nr_pages << PAGE_SHIFT);
ret = offline_pages(start_pfn, nr_pages);
break;
default:
WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
......@@ -278,13 +275,11 @@ memory_block_action(unsigned long phys_index, unsigned long action)
return ret;
}
static int memory_block_change_state(struct memory_block *mem,
static int __memory_block_change_state(struct memory_block *mem,
unsigned long to_state, unsigned long from_state_req)
{
int ret = 0;
mutex_lock(&mem->state_mutex);
if (mem->state != from_state_req) {
ret = -EINVAL;
goto out;
......@@ -312,10 +307,20 @@ static int memory_block_change_state(struct memory_block *mem,
break;
}
out:
mutex_unlock(&mem->state_mutex);
return ret;
}
static int memory_block_change_state(struct memory_block *mem,
unsigned long to_state, unsigned long from_state_req)
{
int ret;
mutex_lock(&mem->state_mutex);
ret = __memory_block_change_state(mem, to_state, from_state_req);
mutex_unlock(&mem->state_mutex);
return ret;
}
static ssize_t
store_mem_state(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
......@@ -655,6 +660,21 @@ int unregister_memory_section(struct mem_section *section)
return remove_memory_block(0, section, 0);
}
/*
* offline one memory block. If the memory block has been offlined, do nothing.
*/
int offline_memory_block(struct memory_block *mem)
{
int ret = 0;
mutex_lock(&mem->state_mutex);
if (mem->state != MEM_OFFLINE)
ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
mutex_unlock(&mem->state_mutex);
return ret;
}
/*
* Initialize the sysfs support for memory devices...
*/
......
......@@ -507,7 +507,7 @@ static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
/* Remap-pfn-range will mark the range VM_IO */
if (remap_pfn_range(vma,
vma->vm_start,
__pa(soft->gscr_addr) >> PAGE_SHIFT,
......
......@@ -322,7 +322,7 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
vma->vm_ops = &mmap_mem_ops;
/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
/* Remap-pfn-range will mark the range VM_IO */
if (remap_pfn_range(vma,
vma->vm_start,
vma->vm_pgoff,
......
......@@ -286,7 +286,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
atomic_set(&vdata->refcnt, 1);
vma->vm_private_data = vdata;
vma->vm_flags |= (VM_IO | VM_RESERVED | VM_PFNMAP | VM_DONTEXPAND);
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
if (vdata->type == MSPEC_FETCHOP || vdata->type == MSPEC_UNCACHED)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_ops = &mspec_vm_ops;
......
......@@ -706,7 +706,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
goto out_unlock;
}
vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_ops = obj->dev->driver->gem_vm_ops;
vma->vm_private_data = map->handle;
vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
......
......@@ -514,8 +514,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma)
vma->vm_ops = &drm_vm_dma_ops;
vma->vm_flags |= VM_RESERVED; /* Don't swap */
vma->vm_flags |= VM_DONTEXPAND;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
drm_vm_open_locked(dev, vma);
return 0;
......@@ -643,21 +642,16 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
case _DRM_SHM:
vma->vm_ops = &drm_vm_shm_ops;
vma->vm_private_data = (void *)map;
/* Don't let this area swap. Change when
DRM_KERNEL advisory is supported. */
vma->vm_flags |= VM_RESERVED;
break;
case _DRM_SCATTER_GATHER:
vma->vm_ops = &drm_vm_sg_ops;
vma->vm_private_data = (void *)map;
vma->vm_flags |= VM_RESERVED;
vma->vm_page_prot = drm_dma_prot(map->type, vma);
break;
default:
return -EINVAL; /* This should never happen. */
}
vma->vm_flags |= VM_RESERVED; /* Don't swap */
vma->vm_flags |= VM_DONTEXPAND;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
drm_vm_open_locked(dev, vma);
return 0;
......
......@@ -500,7 +500,7 @@ static int exynos_drm_gem_mmap_buffer(struct file *filp,
DRM_DEBUG_KMS("%s\n", __FILE__);
vma->vm_flags |= (VM_IO | VM_RESERVED);
vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
update_vm_cache_attr(exynos_gem_obj, vma);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment