Commit 9e2d8656 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'akpm' (Andrew's patch-bomb)

Merge patches from Andrew Morton:
 "A few misc things and very nearly all of the MM tree.  A tremendous
  amount of stuff (again), including a significant rbtree library
  rework."

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (160 commits)
  sparc64: Support transparent huge pages.
  mm: thp: Use more portable PMD clearing sequenece in zap_huge_pmd().
  mm: Add and use update_mmu_cache_pmd() in transparent huge page code.
  sparc64: Document PGD and PMD layout.
  sparc64: Eliminate PTE table memory wastage.
  sparc64: Halve the size of PTE tables
  sparc64: Only support 4MB huge pages and 8KB base pages.
  memory-hotplug: suppress "Trying to free nonexistent resource <XXXXXXXXXXXXXXXX-YYYYYYYYYYYYYYYY>" warning
  mm: memcg: clean up mm_match_cgroup() signature
  mm: document PageHuge somewhat
  mm: use %pK for /proc/vmallocinfo
  mm, thp: fix mlock statistics
  mm, thp: fix mapped pages avoiding unevictable list on mlock
  memory-hotplug: update memory block's state and notify userspace
  memory-hotplug: preparation to notify memory block's state at memory hot remove
  mm: avoid section mismatch warning for memblock_type_name
  make GFP_NOTRACK definition unconditional
  cma: decrease cc.nr_migratepages after reclaiming pagelist
  CMA: migrate mlocked pages
  kpageflags: fix wrong KPF_THP on non-huge compound pages
  ...
parents 1ea4f4f8 9e695d2e
......@@ -36,7 +36,7 @@ static inline void tsb_context_switch(struct mm_struct *mm)
{
__tsb_context_switch(__pa(mm->pgd),
&mm->context.tsb_block[0],
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
(mm->context.tsb_block[1].tsb ?
&mm->context.tsb_block[1] :
NULL)
......
......@@ -3,13 +3,7 @@
#include <linux/const.h>
#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define PAGE_SHIFT 13
#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
#define PAGE_SHIFT 16
#else
#error No page size specified in kernel configuration
#endif
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
......@@ -21,15 +15,9 @@
#define DCACHE_ALIASING_POSSIBLE
#endif
#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#define HPAGE_SHIFT 22
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
#define HPAGE_SHIFT 19
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
#define HPAGE_SHIFT 16
#endif
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
......@@ -38,6 +26,11 @@
#ifndef __ASSEMBLY__
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
struct mm_struct;
extern void hugetlb_setup(struct mm_struct *mm);
#endif
#define WANT_PAGE_VIRTUAL
extern void _clear_page(void *page);
......@@ -98,7 +91,7 @@ typedef unsigned long pgprot_t;
#endif /* (STRICT_MM_TYPECHECKS) */
typedef struct page *pgtable_t;
typedef pte_t *pgtable_t;
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
(_AC(0x0000000070000000,UL)) : \
......
......@@ -38,51 +38,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(pgtable_cache, pmd);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
struct page *page;
pte_t *pte;
pte = pte_alloc_one_kernel(mm, address);
if (!pte)
return NULL;
page = virt_to_page(pte);
pgtable_page_ctor(page);
return page;
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
pgtable_page_dtor(ptepage);
__free_page(ptepage);
}
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address);
extern pgtable_t pte_alloc_one(struct mm_struct *mm,
unsigned long address);
extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
extern void pte_free(struct mm_struct *mm, pgtable_t ptepage);
#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
#define pmd_populate(MM,PMD,PTE_PAGE) \
pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
#define pmd_pgtable(pmd) pmd_page(pmd)
#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
#define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
#define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD))
#define check_pgt_cache() do { } while (0)
static inline void pgtable_free(void *table, bool is_page)
{
if (is_page)
free_page((unsigned long)table);
else
kmem_cache_free(pgtable_cache, table);
}
extern void pgtable_free(void *table, bool is_page);
#ifdef CONFIG_SMP
......@@ -113,11 +82,10 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is
}
#endif /* !CONFIG_SMP */
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
unsigned long address)
{
pgtable_page_dtor(ptepage);
pgtable_free_tlb(tlb, page_address(ptepage), true);
pgtable_free_tlb(tlb, pte, true);
}
#define __pmd_free_tlb(tlb, pmd, addr) \
......
......@@ -45,40 +45,59 @@
#define vmemmap ((struct page *)VMEMMAP_BASE)
/* XXX All of this needs to be rethought so we can take advantage
* XXX cheetah's full 64-bit virtual address space, ie. no more hole
* XXX in the middle like on spitfire. -DaveM
*/
/*
* Given a virtual address, the lowest PAGE_SHIFT bits determine offset
* into the page; the next higher PAGE_SHIFT-3 bits determine the pte#
* in the proper pagetable (the -3 is from the 8 byte ptes, and each page
* table is a single page long). The next higher PMD_BITS determine pmd#
* in the proper pmdtable (where we must have PMD_BITS <= (PAGE_SHIFT-2)
* since the pmd entries are 4 bytes, and each pmd page is a single page
* long). Finally, the higher few bits determine pgde#.
*/
/* PMD_SHIFT determines the size of the area a second-level page
* table can map
*/
#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4))
#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PMD_BITS (PAGE_SHIFT - 2)
/* PGDIR_SHIFT determines what a third-level page table entry can map */
#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS)
#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PGDIR_BITS (PAGE_SHIFT - 2)
#if (PGDIR_SHIFT + PGDIR_BITS) != 44
#error Page table parameters do not cover virtual address space properly.
#endif
#if (PMD_SHIFT != HPAGE_SHIFT)
#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
#endif
/* PMDs point to PTE tables which are 4K aligned. */
#define PMD_PADDR _AC(0xfffffffe,UL)
#define PMD_PADDR_SHIFT _AC(11,UL)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PMD_ISHUGE _AC(0x00000001,UL)
/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge
* pages, this frees up a bunch of bits in the layout that we can
* use for the protection settings and software metadata.
*/
#define PMD_HUGE_PADDR _AC(0xfffff800,UL)
#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL)
#define PMD_HUGE_PRESENT _AC(0x00000400,UL)
#define PMD_HUGE_WRITE _AC(0x00000200,UL)
#define PMD_HUGE_DIRTY _AC(0x00000100,UL)
#define PMD_HUGE_ACCESSED _AC(0x00000080,UL)
#define PMD_HUGE_EXEC _AC(0x00000040,UL)
#define PMD_HUGE_SPLITTING _AC(0x00000020,UL)
#endif
/* PGDs point to PMD tables which are 8K aligned. */
#define PGD_PADDR _AC(0xfffffffc,UL)
#define PGD_PADDR_SHIFT _AC(11,UL)
#ifndef __ASSEMBLY__
#include <linux/sched.h>
/* Entries per page directory level. */
#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4))
#define PTRS_PER_PMD (1UL << PMD_BITS)
#define PTRS_PER_PGD (1UL << PGDIR_BITS)
......@@ -160,26 +179,11 @@
#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */
#define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */
#if PAGE_SHIFT == 13
#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U
#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V
#elif PAGE_SHIFT == 16
#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U
#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V
#else
#error Wrong PAGE_SHIFT specified
#endif
#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U
#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U
#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V
#endif
/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
#define __P000 __pgprot(0)
......@@ -218,7 +222,6 @@ extern unsigned long _PAGE_CACHE;
extern unsigned long pg_iobits;
extern unsigned long _PAGE_ALL_SZ_BITS;
extern unsigned long _PAGE_SZBITS;
extern struct page *mem_map_zero;
#define ZERO_PAGE(vaddr) (mem_map_zero)
......@@ -231,25 +234,25 @@ extern struct page *mem_map_zero;
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
{
unsigned long paddr = pfn << PAGE_SHIFT;
unsigned long sz_bits;
sz_bits = 0UL;
if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) {
__asm__ __volatile__(
"\n661: sethi %%uhi(%1), %0\n"
" sllx %0, 32, %0\n"
" .section .sun4v_2insn_patch, \"ax\"\n"
" .word 661b\n"
" mov %2, %0\n"
" nop\n"
" .previous\n"
: "=r" (sz_bits)
: "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V));
}
return __pte(paddr | sz_bits | pgprot_val(prot));
BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
return __pte(paddr | pgprot_val(prot));
}
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot);
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
/* Do nothing, mk_pmd() does this part. */
return pmd;
}
#endif
/* This one can be done with two shifts. */
static inline unsigned long pte_pfn(pte_t pte)
{
......@@ -286,6 +289,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
* Note: We encode this into 3 sun4v 2-insn patch sequences.
*/
BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
__asm__ __volatile__(
"\n661: sethi %%uhi(%2), %1\n"
" sethi %%hi(%2), %0\n"
......@@ -307,10 +311,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
: "=r" (mask), "=r" (tmp)
: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
_PAGE_SZBITS_4U | _PAGE_SPECIAL),
_PAGE_SPECIAL),
"i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
_PAGE_SZBITS_4V | _PAGE_SPECIAL));
_PAGE_SPECIAL));
return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
}
......@@ -618,19 +622,130 @@ static inline unsigned long pte_special(pte_t pte)
return pte_val(pte) & _PAGE_SPECIAL;
}
#define pmd_set(pmdp, ptep) \
(pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_young(pmd_t pmd)
{
return pmd_val(pmd) & PMD_HUGE_ACCESSED;
}
static inline int pmd_write(pmd_t pmd)
{
return pmd_val(pmd) & PMD_HUGE_WRITE;
}
static inline unsigned long pmd_pfn(pmd_t pmd)
{
unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR;
return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
}
static inline int pmd_large(pmd_t pmd)
{
return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
(PMD_ISHUGE | PMD_HUGE_PRESENT);
}
static inline int pmd_trans_splitting(pmd_t pmd)
{
return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
(PMD_ISHUGE|PMD_HUGE_SPLITTING);
}
static inline int pmd_trans_huge(pmd_t pmd)
{
return pmd_val(pmd) & PMD_ISHUGE;
}
#define has_transparent_hugepage() 1
static inline pmd_t pmd_mkold(pmd_t pmd)
{
pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
return pmd;
}
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
pmd_val(pmd) &= ~PMD_HUGE_WRITE;
return pmd;
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_DIRTY;
return pmd;
}
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_ACCESSED;
return pmd;
}
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_WRITE;
return pmd;
}
static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
return pmd;
}
static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_SPLITTING;
return pmd;
}
extern pgprot_t pmd_pgprot(pmd_t entry);
#endif
static inline int pmd_present(pmd_t pmd)
{
return pmd_val(pmd) != 0U;
}
#define pmd_none(pmd) (!pmd_val(pmd))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
#else
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
}
#endif
static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT;
pmd_val(*pmdp) = val;
}
#define pud_set(pudp, pmdp) \
(pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> 11UL))
#define __pmd_page(pmd) \
((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL)))
(pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT))
static inline unsigned long __pmd_page(pmd_t pmd)
{
unsigned long paddr = (unsigned long) pmd_val(pmd);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_val(pmd) & PMD_ISHUGE)
paddr &= PMD_HUGE_PADDR;
#endif
paddr <<= PMD_PADDR_SHIFT;
return ((unsigned long) __va(paddr));
}
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pud_page_vaddr(pud) \
((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT)))
#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
#define pmd_present(pmd) (pmd_val(pmd) != 0U)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (0)
......@@ -664,6 +779,16 @@ static inline unsigned long pte_special(pte_t pte)
extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
pte_t *ptep, pte_t orig, int fullmm);
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
unsigned long addr,
pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
set_pmd_at(mm, addr, pmdp, __pmd(0U));
return pmd;
}
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int fullmm)
{
......@@ -719,6 +844,16 @@ extern void mmu_info(struct seq_file *);
struct vm_area_struct;
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
#define __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
#endif
/* Encode and de-code a swap entry */
#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL)
......
......@@ -147,20 +147,96 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
sllx REG1, 11, REG1; \
sllx REG1, PGD_PADDR_SHIFT, REG1; \
andn REG2, 0x3, REG2; \
lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
sllx REG1, 11, REG1; \
srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
sllx REG1, PMD_PADDR_SHIFT, REG1; \
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1;
/* Do a user page table walk in MMU globals. Leaves physical PTE
* pointer in REG1. Jumps to FAIL_LABEL on early page table walk
* termination. Physical base of page tables is in PHYS_PGD which
* will not be modified.
/* This macro exists only to make the PMD translator below easier
* to read. It hides the ELF section switch for the sun4v code
* patching.
*/
#define OR_PTE_BIT(REG, NAME) \
661: or REG, _PAGE_##NAME##_4U, REG; \
.section .sun4v_1insn_patch, "ax"; \
.word 661b; \
or REG, _PAGE_##NAME##_4V, REG; \
.previous;
/* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */
#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \
661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \
.section .sun4v_1insn_patch, "ax"; \
.word 661b; \
sethi %uhi(_PAGE_VALID), REG; \
.previous; \
sllx REG, 32, REG; \
661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \
.section .sun4v_1insn_patch, "ax"; \
.word 661b; \
or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \
.previous;
/* PMD has been loaded into REG1, interpret the value, seeing
* if it is a HUGE PMD or a normal one. If it is not valid
* then jump to FAIL_LABEL. If it is a HUGE PMD, and it
* translates to a valid PTE, branch to PTE_LABEL.
*
* We translate the PMD by hand, one bit at a time,
* constructing the huge PTE.
*
* So we construct the PTE in REG2 as follows:
*
* 1) Extract the PMD PFN from REG1 and place it into REG2.
*
* 2) Translate PMD protection bits in REG1 into REG2, one bit
* at a time using andcc tests on REG1 and OR's into REG2.
*
* Only two bits to be concerned with here, EXEC and WRITE.
* Now REG1 is freed up and we can use it as a temporary.
*
* 3) Construct the VALID, CACHE, and page size PTE bits in
* REG1, OR with REG2 to form final PTE.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
brz,pn REG1, FAIL_LABEL; \
andcc REG1, PMD_ISHUGE, %g0; \
be,pt %xcc, 700f; \
and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \
cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \
bne,pn %xcc, FAIL_LABEL; \
andn REG1, PMD_HUGE_PROTBITS, REG2; \
sllx REG2, PMD_PADDR_SHIFT, REG2; \
/* REG2 now holds PFN << PAGE_SHIFT */ \
andcc REG1, PMD_HUGE_EXEC, %g0; \
bne,a,pt %xcc, 1f; \
OR_PTE_BIT(REG2, EXEC); \
1: andcc REG1, PMD_HUGE_WRITE, %g0; \
bne,a,pt %xcc, 1f; \
OR_PTE_BIT(REG2, W); \
/* REG1 can now be clobbered, build final PTE */ \
1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \
ba,pt %xcc, PTE_LABEL; \
or REG1, REG2, REG1; \
700:
#else
#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
brz,pn REG1, FAIL_LABEL; \
nop;
#endif
/* Do a user page table walk in MMU globals. Leaves final,
* valid, PTE value in REG1. Jumps to FAIL_LABEL on early
* page table walk termination or if the PTE is not valid.
*
* Physical base of page tables is in PHYS_PGD which will not
* be modified.
*
* VADDR will not be clobbered, but REG1 and REG2 will.
*/
......@@ -172,15 +248,19 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
sllx REG1, 11, REG1; \
sllx REG1, PGD_PADDR_SHIFT, REG1; \
andn REG2, 0x3, REG2; \
lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
sllx REG1, 11, REG1; \
USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
sllx REG1, PMD_PADDR_SHIFT, REG1; \
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1;
add REG1, REG2, REG1; \
ldxa [REG1] ASI_PHYS_USE_EC, REG1; \
brgez,pn REG1, FAIL_LABEL; \
nop; \
800:
/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
* If no entry is found, FAIL_LABEL will be branched to. On success
......
......@@ -779,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev,
static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state)
{
vma->vm_flags |= (VM_IO | VM_RESERVED);
vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
}
/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
......
......@@ -176,7 +176,7 @@ sun4v_tsb_miss_common:
sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mov SCRATCHPAD_UTSBREG2, %g5
ldxa [%g5] ASI_SCRATCHPAD, %g5
cmp %g5, -1
......
......@@ -49,7 +49,7 @@ tsb_miss_page_table_walk:
/* Before committing to a full page table walk,
* check the huge page TSB.
*/
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
nop
......@@ -110,12 +110,9 @@ tsb_miss_page_table_walk:
tsb_miss_page_table_walk_sun4v_fastpath:
USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
brgez,pn %g5, tsb_do_fault
nop
/* Valid PTE is now in %g5. */
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
661: sethi %uhi(_PAGE_SZALL_4U), %g7
sllx %g7, 32, %g7
.section .sun4v_2insn_patch, "ax"
......
......@@ -265,6 +265,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
......
......@@ -452,6 +452,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
......@@ -464,13 +465,13 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
up_read(&mm->mmap_sem);
mm_rss = get_mm_rss(mm);
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
#endif
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
tsb_grow(mm, MM_TSB_BASE, mm_rss);
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss = mm->context.huge_pte_count;
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
......
......@@ -303,53 +303,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
{
return NULL;
}
static void context_reload(void *__data)
{
struct mm_struct *mm = __data;
if (mm == current->mm)
load_secondary_context(mm);
}
void hugetlb_prefault_arch_hook(struct mm_struct *mm)
{
struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
if (likely(tp->tsb != NULL))
return;
tsb_grow(mm, MM_TSB_HUGE, 0);
tsb_context_switch(mm);
smp_tsb_sync(mm);
/* On UltraSPARC-III+ and later, configure the second half of
* the Data-TLB for huge pages.
*/
if (tlb_type == cheetah_plus) {
unsigned long ctx;
spin_lock(&ctx_alloc_lock);
ctx = mm->context.sparc64_ctx_val;
ctx &= ~CTX_PGSZ_MASK;
ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
if (ctx != mm->context.sparc64_ctx_val) {
/* When changing the page size fields, we
* must perform a context flush so that no
* stale entries match. This flush must
* occur with the original context register
* settings.
*/
do_flush_tlb_mm(mm);
/* Reload the context register of all processors
* also executing in this address space.
*/
mm->context.sparc64_ctx_val = ctx;
on_each_cpu(context_reload, mm, 0);
}
spin_unlock(&ctx_alloc_lock);
}
}
......@@ -276,7 +276,6 @@ static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long
}
unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
unsigned long _PAGE_SZBITS __read_mostly;
static void flush_dcache(unsigned long pfn)
{
......@@ -307,12 +306,24 @@ static void flush_dcache(unsigned long pfn)
}
}
/* mm->context.lock must be held */
static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
unsigned long tsb_hash_shift, unsigned long address,
unsigned long tte)
{
struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
unsigned long tag;
tsb += ((address >> tsb_hash_shift) &
(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
tag = (address >> 22UL);
tsb_insert(tsb, tag, tte);
}
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
{
unsigned long tsb_index, tsb_hash_shift, flags;
struct mm_struct *mm;
struct tsb *tsb;
unsigned long tag, flags;
unsigned long tsb_index, tsb_hash_shift;
pte_t pte = *ptep;
if (tlb_type != hypervisor) {
......@@ -329,7 +340,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
spin_lock_irqsave(&mm->context.lock, flags);
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
if ((tlb_type == hypervisor &&
(pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
......@@ -341,11 +352,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
}
#endif
tsb = mm->context.tsb_block[tsb_index].tsb;
tsb += ((address >> tsb_hash_shift) &
(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
tag = (address >> 22UL);
tsb_insert(tsb, tag, pte_val(pte));
__update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift,
address, pte_val(pte));
spin_unlock_irqrestore(&mm->context.lock, flags);
}
......@@ -2275,8 +2283,7 @@ static void __init sun4u_pgprot_init(void)
__ACCESS_BITS_4U | _PAGE_E_4U);
#ifdef CONFIG_DEBUG_PAGEALLOC
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4U) ^
0xfffff80000000000UL;
kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
0xfffff80000000000UL;
......@@ -2287,7 +2294,6 @@ static void __init sun4u_pgprot_init(void)
for (i = 1; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
_PAGE_SZBITS = _PAGE_SZBITS_4U;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
_PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
_PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
......@@ -2324,8 +2330,7 @@ static void __init sun4v_pgprot_init(void)
_PAGE_CACHE = _PAGE_CACHE_4V;
#ifdef CONFIG_DEBUG_PAGEALLOC
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^
0xfffff80000000000UL;
kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
0xfffff80000000000UL;
......@@ -2339,7 +2344,6 @@ static void __init sun4v_pgprot_init(void)
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
__ACCESS_BITS_4V | _PAGE_E_4V);
_PAGE_SZBITS = _PAGE_SZBITS_4V;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
_PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
_PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
......@@ -2472,3 +2476,281 @@ void __flush_tlb_all(void)
__asm__ __volatile__("wrpr %0, 0, %%pstate"
: : "r" (pstate));
}
static pte_t *get_from_cache(struct mm_struct *mm)
{
struct page *page;
pte_t *ret;
spin_lock(&mm->page_table_lock);
page = mm->context.pgtable_page;
ret = NULL;
if (page) {
void *p = page_address(page);
mm->context.pgtable_page = NULL;
ret = (pte_t *) (p + (PAGE_SIZE / 2));
}
spin_unlock(&mm->page_table_lock);
return ret;
}
static struct page *__alloc_for_cache(struct mm_struct *mm)
{
struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
__GFP_REPEAT | __GFP_ZERO);
if (page) {
spin_lock(&mm->page_table_lock);
if (!mm->context.pgtable_page) {
atomic_set(&page->_count, 2);
mm->context.pgtable_page = page;
}
spin_unlock(&mm->page_table_lock);
}
return page;
}
pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
struct page *page;
pte_t *pte;
pte = get_from_cache(mm);
if (pte)
return pte;
page = __alloc_for_cache(mm);
if (page)
pte = (pte_t *) page_address(page);
return pte;
}
pgtable_t pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
struct page *page;
pte_t *pte;
pte = get_from_cache(mm);
if (pte)
return pte;
page = __alloc_for_cache(mm);
if (page) {
pgtable_page_ctor(page);
pte = (pte_t *) page_address(page);
}
return pte;
}
void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
struct page *page = virt_to_page(pte);
if (put_page_testzero(page))
free_hot_cold_page(page, 0);
}
static void __pte_free(pgtable_t pte)
{
struct page *page = virt_to_page(pte);
if (put_page_testzero(page)) {
pgtable_page_dtor(page);
free_hot_cold_page(page, 0);
}
}
void pte_free(struct mm_struct *mm, pgtable_t pte)
{
__pte_free(pte);
}
void pgtable_free(void *table, bool is_page)
{
if (is_page)
__pte_free(table);
else
kmem_cache_free(pgtable_cache, table);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
{
if (pgprot_val(pgprot) & _PAGE_VALID)
pmd_val(pmd) |= PMD_HUGE_PRESENT;
if (tlb_type == hypervisor) {
if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
pmd_val(pmd) |= PMD_HUGE_WRITE;
if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
pmd_val(pmd) |= PMD_HUGE_EXEC;
if (!for_modify) {
if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
pmd_val(pmd) |= PMD_HUGE_ACCESSED;
if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
pmd_val(pmd) |= PMD_HUGE_DIRTY;
}
} else {
if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
pmd_val(pmd) |= PMD_HUGE_WRITE;
if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
pmd_val(pmd) |= PMD_HUGE_EXEC;
if (!for_modify) {
if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
pmd_val(pmd) |= PMD_HUGE_ACCESSED;
if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
pmd_val(pmd) |= PMD_HUGE_DIRTY;
}
}
return pmd;
}
pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
pmd_t pmd;
pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
pmd_val(pmd) |= PMD_ISHUGE;
pmd = pmd_set_protbits(pmd, pgprot, false);
return pmd;
}
pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
PMD_HUGE_WRITE |
PMD_HUGE_EXEC);
pmd = pmd_set_protbits(pmd, newprot, true);
return pmd;
}
pgprot_t pmd_pgprot(pmd_t entry)
{
unsigned long pte = 0;
if (pmd_val(entry) & PMD_HUGE_PRESENT)
pte |= _PAGE_VALID;
if (tlb_type == hypervisor) {
if (pmd_val(entry) & PMD_HUGE_PRESENT)
pte |= _PAGE_PRESENT_4V;
if (pmd_val(entry) & PMD_HUGE_EXEC)
pte |= _PAGE_EXEC_4V;
if (pmd_val(entry) & PMD_HUGE_WRITE)
pte |= _PAGE_W_4V;
if (pmd_val(entry) & PMD_HUGE_ACCESSED)
pte |= _PAGE_ACCESSED_4V;
if (pmd_val(entry) & PMD_HUGE_DIRTY)
pte |= _PAGE_MODIFIED_4V;
pte |= _PAGE_CP_4V|_PAGE_CV_4V;
} else {
if (pmd_val(entry) & PMD_HUGE_PRESENT)
pte |= _PAGE_PRESENT_4U;
if (pmd_val(entry) & PMD_HUGE_EXEC)
pte |= _PAGE_EXEC_4U;
if (pmd_val(entry) & PMD_HUGE_WRITE)
pte |= _PAGE_W_4U;
if (pmd_val(entry) & PMD_HUGE_ACCESSED)
pte |= _PAGE_ACCESSED_4U;
if (pmd_val(entry) & PMD_HUGE_DIRTY)
pte |= _PAGE_MODIFIED_4U;
pte |= _PAGE_CP_4U|_PAGE_CV_4U;
}
return __pgprot(pte);
}
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
unsigned long pte, flags;
struct mm_struct *mm;
pmd_t entry = *pmd;
pgprot_t prot;
if (!pmd_large(entry) || !pmd_young(entry))
return;
pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
pte <<= PMD_PADDR_SHIFT;
pte |= _PAGE_VALID;
prot = pmd_pgprot(entry);
if (tlb_type == hypervisor)
pgprot_val(prot) |= _PAGE_SZHUGE_4V;
else
pgprot_val(prot) |= _PAGE_SZHUGE_4U;
pte |= pgprot_val(prot);
mm = vma->vm_mm;
spin_lock_irqsave(&mm->context.lock, flags);
if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
__update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
addr, pte);
spin_unlock_irqrestore(&mm->context.lock, flags);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
static void context_reload(void *__data)
{
struct mm_struct *mm = __data;
if (mm == current->mm)
load_secondary_context(mm);
}
void hugetlb_setup(struct mm_struct *mm)
{
struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
if (likely(tp->tsb != NULL))
return;
tsb_grow(mm, MM_TSB_HUGE, 0);
tsb_context_switch(mm);
smp_tsb_sync(mm);
/* On UltraSPARC-III+ and later, configure the second half of
* the Data-TLB for huge pages.
*/
if (tlb_type == cheetah_plus) {
unsigned long ctx;
spin_lock(&ctx_alloc_lock);
ctx = mm->context.sparc64_ctx_val;
ctx &= ~CTX_PGSZ_MASK;
ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
if (ctx != mm->context.sparc64_ctx_val) {
/* When changing the page size fields, we
* must perform a context flush so that no
* stale entries match. This flush must
* occur with the original context register
* settings.
*/
do_flush_tlb_mm(mm);
/* Reload the context register of all processors
* also executing in this address space.
*/
mm->context.sparc64_ctx_val = ctx;
on_each_cpu(context_reload, mm, 0);
}
spin_unlock(&ctx_alloc_lock);
}
}
#endif
......@@ -43,16 +43,37 @@ void flush_tlb_pending(void)
put_cpu_var(tlb_batch);
}
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
pte_t *ptep, pte_t orig, int fullmm)
static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
bool exec)
{
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
unsigned long nr;
vaddr &= PAGE_MASK;
if (pte_exec(orig))
if (exec)
vaddr |= 0x1UL;
nr = tb->tlb_nr;
if (unlikely(nr != 0 && mm != tb->mm)) {
flush_tlb_pending();
nr = 0;
}
if (nr == 0)
tb->mm = mm;
tb->vaddrs[nr] = vaddr;
tb->tlb_nr = ++nr;
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
put_cpu_var(tlb_batch);
}
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
pte_t *ptep, pte_t orig, int fullmm)
{
if (tlb_type != hypervisor &&
pte_dirty(orig)) {
unsigned long paddr, pfn = pte_pfn(orig);
......@@ -77,26 +98,91 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
}
no_cache_flush:
if (!fullmm)
tlb_batch_add_one(mm, vaddr, pte_exec(orig));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
pmd_t pmd, bool exec)
{
unsigned long end;
pte_t *pte;
pte = pte_offset_map(&pmd, vaddr);
end = vaddr + HPAGE_SIZE;
while (vaddr < end) {
if (pte_val(*pte) & _PAGE_VALID)
tlb_batch_add_one(mm, vaddr, exec);
pte++;
vaddr += PAGE_SIZE;
}
pte_unmap(pte);
}
if (fullmm) {
put_cpu_var(tlb_batch);
void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
pmd_t orig = *pmdp;
*pmdp = pmd;
if (mm == &init_mm)
return;
if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
if (pmd_val(pmd) & PMD_ISHUGE)
mm->context.huge_pte_count++;
else
mm->context.huge_pte_count--;
if (mm->context.huge_pte_count == 1)
hugetlb_setup(mm);
}
nr = tb->tlb_nr;
if (!pmd_none(orig)) {
bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
if (unlikely(nr != 0 && mm != tb->mm)) {
flush_tlb_pending();
nr = 0;
addr &= HPAGE_MASK;
if (pmd_val(orig) & PMD_ISHUGE)
tlb_batch_add_one(mm, addr, exec);
else
tlb_batch_pmd_scan(mm, addr, orig, exec);
}
}
if (nr == 0)
tb->mm = mm;
void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
tb->vaddrs[nr] = vaddr;
tb->tlb_nr = ++nr;
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
assert_spin_locked(&mm->page_table_lock);
put_cpu_var(tlb_batch);
/* FIFO */
if (!mm->pmd_huge_pte)
INIT_LIST_HEAD(lh);
else
list_add(lh, (struct list_head *) mm->pmd_huge_pte);
mm->pmd_huge_pte = pgtable;
}
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
{
struct list_head *lh;
pgtable_t pgtable;
assert_spin_locked(&mm->page_table_lock);
/* FIFO */
pgtable = mm->pmd_huge_pte;
lh = (struct list_head *) pgtable;
if (list_empty(lh))
mm->pmd_huge_pte = NULL;
else {
mm->pmd_huge_pte = (pgtable_t) lh->next;
list_del(lh);
}
pte_val(pgtable[0]) = 0;
pte_val(pgtable[1]) = 0;
return pgtable;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
......@@ -78,7 +78,7 @@ void flush_tsb_user(struct tlb_batch *tb)
base = __pa(base);
__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
......@@ -90,29 +90,12 @@ void flush_tsb_user(struct tlb_batch *tb)
spin_unlock_irqrestore(&mm->context.lock, flags);
}
#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
#else
#error Broken base page size setting...
#endif
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
#else
#error Broken huge page size setting...
#endif
#endif
static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
......@@ -207,7 +190,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
case MM_TSB_BASE:
hp->pgsz_idx = HV_PGSZ_IDX_BASE;
break;
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
case MM_TSB_HUGE:
hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
break;
......@@ -222,7 +205,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
case MM_TSB_BASE:
hp->pgsz_mask = HV_PGSZ_MASK_BASE;
break;
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
case MM_TSB_HUGE:
hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
break;
......@@ -444,7 +427,7 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
unsigned long huge_pte_count;
#endif
unsigned int i;
......@@ -453,7 +436,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.sparc64_ctx_val = 0UL;
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
/* We reset it to zero because the fork() page copying
* will re-increment the counters as the parent PTEs are
* copied into the child address space.
......@@ -462,6 +445,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.huge_pte_count = 0;
#endif
mm->context.pgtable_page = NULL;
/* copy_mm() copies over the parent's mm_struct before calling
* us, so we need to zero out the TSB pointer or else tsb_grow()
* will be confused and think there is an older TSB to free up.
......@@ -474,7 +459,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
*/
tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (unlikely(huge_pte_count))
tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
#endif
......@@ -500,10 +485,17 @@ static void tsb_destroy_one(struct tsb_config *tp)
void destroy_context(struct mm_struct *mm)
{
unsigned long flags, i;
struct page *page;
for (i = 0; i < MM_NUM_TSBS; i++)
tsb_destroy_one(&mm->context.tsb_block[i]);
page = mm->context.pgtable_page;
if (page && put_page_testzero(page)) {
pgtable_page_dtor(page);
free_hot_cold_page(page, 0);
}
spin_lock_irqsave(&ctx_alloc_lock, flags);
if (CTX_VALID(mm->context)) {
......
......@@ -7,12 +7,15 @@ config TILE
select HAVE_DMA_API_DEBUG
select HAVE_KVM if !TILEGX
select GENERIC_FIND_FIRST_BIT
select SYSCTL_EXCEPTION_TRACE
select USE_GENERIC_SMP_HELPERS
select CC_OPTIMIZE_FOR_SIZE
select HAVE_DEBUG_KMEMLEAK
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
select GENERIC_IRQ_SHOW
select HAVE_DEBUG_BUGVERBOSE
select HAVE_SYSCALL_WRAPPERS if TILEGX
select SYS_HYPERVISOR
select ARCH_HAVE_NMI_SAFE_CMPXCHG
......
......@@ -106,6 +106,10 @@ static inline void arch_release_hugepage(struct page *page)
{
}
static inline void arch_clear_hugepage_flags(struct page *page)
{
}
#ifdef CONFIG_HUGETLB_SUPER_PAGES
static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
struct page *page, int writable)
......
......@@ -36,19 +36,14 @@ static void sim_notify_exec(const char *binary_name)
} while (c);
}
static int notify_exec(void)
static int notify_exec(struct mm_struct *mm)
{
int retval = 0; /* failure */
struct vm_area_struct *vma = current->mm->mmap;
while (vma) {
if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
break;
vma = vma->vm_next;
}
if (vma) {
if (mm->exe_file) {
char *buf = (char *) __get_free_page(GFP_KERNEL);
if (buf) {
char *path = d_path(&vma->vm_file->f_path,
char *path = d_path(&mm->exe_file->f_path,
buf, PAGE_SIZE);
if (!IS_ERR(path)) {
sim_notify_exec(path);
......@@ -106,16 +101,16 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
unsigned long vdso_base;
int retval = 0;
down_write(&mm->mmap_sem);
/*
* Notify the simulator that an exec just occurred.
* If we can't find the filename of the mapping, just use
* whatever was passed as the linux_binprm filename.
*/
if (!notify_exec())
if (!notify_exec(mm))
sim_notify_exec(bprm->filename);
down_write(&mm->mmap_sem);
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
......
......@@ -454,6 +454,7 @@ static int handle_page_fault(struct pt_regs *regs,
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
......
......@@ -7,6 +7,7 @@ config UML
bool
default y
select HAVE_GENERIC_HARDIRQS
select HAVE_UID16
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
select GENERIC_IO
......
......@@ -89,6 +89,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment