diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 4b7183e9806..bc823c4c028 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -246,50 +246,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) __mmu_notifier_mm_destroy(mm); } -/* - * These two macros will sometime replace ptep_clear_flush. - * ptep_clear_flush is implemented as macro itself, so this also is - * implemented as a macro until ptep_clear_flush will converted to an - * inline function, to diminish the risk of compilation failure. The - * invalidate_page method over time can be moved outside the PT lock - * and these two macros can be later removed. - */ -#define ptep_clear_flush_notify(__vma, __address, __ptep) \ -({ \ - pte_t __pte; \ - struct vm_area_struct *___vma = __vma; \ - unsigned long ___address = __address; \ - __pte = ptep_clear_flush(___vma, ___address, __ptep); \ - mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \ - __pte; \ -}) - -#define pmdp_clear_flush_notify(__vma, __address, __pmdp) \ -({ \ - pmd_t __pmd; \ - struct vm_area_struct *___vma = __vma; \ - unsigned long ___address = __address; \ - VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \ - mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \ - (__address)+HPAGE_PMD_SIZE);\ - __pmd = pmdp_clear_flush(___vma, ___address, __pmdp); \ - mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \ - (__address)+HPAGE_PMD_SIZE); \ - __pmd; \ -}) - -#define pmdp_splitting_flush_notify(__vma, __address, __pmdp) \ -({ \ - struct vm_area_struct *___vma = __vma; \ - unsigned long ___address = __address; \ - VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \ - mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \ - (__address)+HPAGE_PMD_SIZE);\ - pmdp_splitting_flush(___vma, ___address, __pmdp); \ - mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \ - (__address)+HPAGE_PMD_SIZE); \ -}) - #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ ({ \ int __young; \ @@ -380,9 +336,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_young_notify ptep_clear_flush_young #define pmdp_clear_flush_young_notify pmdp_clear_flush_young -#define ptep_clear_flush_notify ptep_clear_flush -#define pmdp_clear_flush_notify pmdp_clear_flush -#define pmdp_splitting_flush_notify pmdp_splitting_flush #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index a52daee11d3..a912da6ddfd 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -192,11 +192,13 @@ retry: if (pte) { /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush_notify(vma, address, pte); + pteval = ptep_clear_flush(vma, address, pte); page_remove_rmap(page); dec_mm_counter(mm, MM_FILEPAGES); BUG_ON(pte_dirty(pteval)); pte_unmap_unlock(pte, ptl); + /* must invalidate_page _before_ freeing the page */ + mmu_notifier_invalidate_page(mm, address); page_cache_release(page); } } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0e7740923fb..08a943b9cf9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -787,6 +787,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, pmd_t _pmd; int ret = 0, i; struct page **pages; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR, GFP_KERNEL); @@ -823,12 +825,16 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, cond_resched(); } + mmun_start = haddr; + mmun_end = haddr + HPAGE_PMD_SIZE; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_pages; VM_BUG_ON(!PageHead(page)); - pmdp_clear_flush_notify(vma, haddr, pmd); + pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ pgtable = pgtable_trans_huge_withdraw(mm); @@ -851,6 +857,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, page_remove_rmap(page); spin_unlock(&mm->page_table_lock); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + ret |= VM_FAULT_WRITE; put_page(page); @@ -859,6 +867,7 @@ out: out_free_pages: spin_unlock(&mm->page_table_lock); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mem_cgroup_uncharge_start(); for (i = 0; i < HPAGE_PMD_NR; i++) { mem_cgroup_uncharge_page(pages[i]); @@ -875,6 +884,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, int ret = 0; struct page *page, *new_page; unsigned long haddr; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ VM_BUG_ON(!vma->anon_vma); spin_lock(&mm->page_table_lock); @@ -925,20 +936,24 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); __SetPageUptodate(new_page); + mmun_start = haddr; + mmun_end = haddr + HPAGE_PMD_SIZE; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + spin_lock(&mm->page_table_lock); put_page(page); if (unlikely(!pmd_same(*pmd, orig_pmd))) { spin_unlock(&mm->page_table_lock); mem_cgroup_uncharge_page(new_page); put_page(new_page); - goto out; + goto out_mn; } else { pmd_t entry; VM_BUG_ON(!PageHead(page)); entry = mk_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = pmd_mkhuge(entry); - pmdp_clear_flush_notify(vma, haddr, pmd); + pmdp_clear_flush(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); update_mmu_cache(vma, address, pmd); @@ -946,9 +961,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, put_page(page); ret |= VM_FAULT_WRITE; } + spin_unlock(&mm->page_table_lock); +out_mn: + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); +out: + return ret; out_unlock: spin_unlock(&mm->page_table_lock); -out: return ret; } @@ -1162,7 +1181,11 @@ static int __split_huge_page_splitting(struct page *page, struct mm_struct *mm = vma->vm_mm; pmd_t *pmd; int ret = 0; + /* For mmu_notifiers */ + const unsigned long mmun_start = address; + const unsigned long mmun_end = address + HPAGE_PMD_SIZE; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); spin_lock(&mm->page_table_lock); pmd = page_check_address_pmd(page, mm, address, PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG); @@ -1174,10 +1197,11 @@ static int __split_huge_page_splitting(struct page *page, * and it won't wait on the anon_vma->root->mutex to * serialize against split_huge_page*. */ - pmdp_splitting_flush_notify(vma, address, pmd); + pmdp_splitting_flush(vma, address, pmd); ret = 1; } spin_unlock(&mm->page_table_lock); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); return ret; } @@ -1898,6 +1922,8 @@ static void collapse_huge_page(struct mm_struct *mm, spinlock_t *ptl; int isolated; unsigned long hstart, hend; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ VM_BUG_ON(address & ~HPAGE_PMD_MASK); @@ -1952,6 +1978,9 @@ static void collapse_huge_page(struct mm_struct *mm, pte = pte_offset_map(pmd, address); ptl = pte_lockptr(mm, pmd); + mmun_start = address; + mmun_end = address + HPAGE_PMD_SIZE; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); spin_lock(&mm->page_table_lock); /* probably unnecessary */ /* * After this gup_fast can't run anymore. This also removes @@ -1959,8 +1988,9 @@ static void collapse_huge_page(struct mm_struct *mm, * huge and small TLB entries for the same virtual address * to avoid the risk of CPU bugs in that area. */ - _pmd = pmdp_clear_flush_notify(vma, address, pmd); + _pmd = pmdp_clear_flush(vma, address, pmd); spin_unlock(&mm->page_table_lock); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); spin_lock(ptl); isolated = __collapse_huge_page_isolate(vma, address, pte); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de5d1dcf34f..993f7c1820a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2355,13 +2355,15 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); + const unsigned long mmun_start = start; /* For mmu_notifiers */ + const unsigned long mmun_end = end; /* For mmu_notifiers */ WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); BUG_ON(end & ~huge_page_mask(h)); tlb_start_vma(tlb, vma); - mmu_notifier_invalidate_range_start(mm, start, end); + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); again: spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { @@ -2425,7 +2427,7 @@ again: if (address < end && !ref_page) goto again; } - mmu_notifier_invalidate_range_end(mm, start, end); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); tlb_end_vma(tlb, vma); } @@ -2525,6 +2527,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, struct page *old_page, *new_page; int avoidcopy; int outside_reserve = 0; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ old_page = pte_page(pte); @@ -2611,6 +2615,9 @@ retry_avoidcopy: pages_per_huge_page(h)); __SetPageUptodate(new_page); + mmun_start = address & huge_page_mask(h); + mmun_end = mmun_start + huge_page_size(h); + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); /* * Retake the page_table_lock to check for racing updates * before the page tables are altered @@ -2619,9 +2626,6 @@ retry_avoidcopy: ptep = huge_pte_offset(mm, address & huge_page_mask(h)); if (likely(pte_same(huge_ptep_get(ptep), pte))) { /* Break COW */ - mmu_notifier_invalidate_range_start(mm, - address & huge_page_mask(h), - (address & huge_page_mask(h)) + huge_page_size(h)); huge_ptep_clear_flush(vma, address, ptep); set_huge_pte_at(mm, address, ptep, make_huge_pte(vma, new_page, 1)); @@ -2629,10 +2633,11 @@ retry_avoidcopy: hugepage_add_new_anon_rmap(new_page, vma, address); /* Make the old page be freed below */ new_page = old_page; - mmu_notifier_invalidate_range_end(mm, - address & huge_page_mask(h), - (address & huge_page_mask(h)) + huge_page_size(h)); } + spin_unlock(&mm->page_table_lock); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + /* Caller expects lock to be held */ + spin_lock(&mm->page_table_lock); page_cache_release(new_page); page_cache_release(old_page); return 0; diff --git a/mm/memory.c b/mm/memory.c index 5f5d1f039bf..b03a4a21c1d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -712,7 +712,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, add_taint(TAINT_BAD_PAGE); } -static inline int is_cow_mapping(vm_flags_t flags) +static inline bool is_cow_mapping(vm_flags_t flags) { return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } @@ -1039,6 +1039,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, unsigned long next; unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ + bool is_cow; int ret; /* @@ -1072,8 +1075,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, * parent mm. And a permission downgrade will only happen if * is_cow_mapping() returns true. */ - if (is_cow_mapping(vma->vm_flags)) - mmu_notifier_invalidate_range_start(src_mm, addr, end); + is_cow = is_cow_mapping(vma->vm_flags); + mmun_start = addr; + mmun_end = end; + if (is_cow) + mmu_notifier_invalidate_range_start(src_mm, mmun_start, + mmun_end); ret = 0; dst_pgd = pgd_offset(dst_mm, addr); @@ -1089,9 +1096,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, } } while (dst_pgd++, src_pgd++, addr = next, addr != end); - if (is_cow_mapping(vma->vm_flags)) - mmu_notifier_invalidate_range_end(src_mm, - vma->vm_start, end); + if (is_cow) + mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end); return ret; } @@ -2516,7 +2522,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl, pte_t orig_pte) __releases(ptl) { - struct page *old_page, *new_page; + struct page *old_page, *new_page = NULL; pte_t entry; int ret = 0; int page_mkwrite = 0; @@ -2760,10 +2766,14 @@ gotten: } else mem_cgroup_uncharge_page(new_page); - if (new_page) - page_cache_release(new_page); unlock: pte_unmap_unlock(page_table, ptl); + if (new_page) { + if (new_page == old_page) + /* cow happened, notify before releasing old_page */ + mmu_notifier_invalidate_page(mm, address); + page_cache_release(new_page); + } if (old_page) { /* * Don't let another task, with possibly unlocked vma, diff --git a/mm/mremap.c b/mm/mremap.c index 3b639a4b26b..1b61c2d3307 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -149,11 +149,15 @@ unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; bool need_flush = false; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); - mmu_notifier_invalidate_range_start(vma->vm_mm, old_addr, old_end); + mmun_start = old_addr; + mmun_end = old_end; + mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end); for (; old_addr < old_end; old_addr += extent, new_addr += extent) { cond_resched(); @@ -197,7 +201,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (likely(need_flush)) flush_tlb_range(vma, old_end-len, old_addr); - mmu_notifier_invalidate_range_end(vma->vm_mm, old_end-len, old_end); + mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); return len + old_addr - old_end; /* how much done */ } diff --git a/mm/rmap.c b/mm/rmap.c index bf03149f495..7df7984d476 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -884,7 +884,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, pte_t entry; flush_cache_page(vma, address, pte_pfn(*pte)); - entry = ptep_clear_flush_notify(vma, address, pte); + entry = ptep_clear_flush(vma, address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); set_pte_at(mm, address, pte, entry); @@ -892,6 +892,9 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, } pte_unmap_unlock(pte, ptl); + + if (ret) + mmu_notifier_invalidate_page(mm, address); out: return ret; } @@ -1212,7 +1215,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); - pteval = ptep_clear_flush_notify(vma, address, pte); + pteval = ptep_clear_flush(vma, address, pte); /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) @@ -1274,6 +1277,8 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, out_unmap: pte_unmap_unlock(pte, ptl); + if (ret != SWAP_FAIL) + mmu_notifier_invalidate_page(mm, address); out: return ret; @@ -1338,6 +1343,8 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, spinlock_t *ptl; struct page *page; unsigned long address; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ unsigned long end; int ret = SWAP_AGAIN; int locked_vma = 0; @@ -1361,6 +1368,10 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, if (!pmd_present(*pmd)) return ret; + mmun_start = address; + mmun_end = end; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + /* * If we can acquire the mmap_sem for read, and vma is VM_LOCKED, * keep the sem while scanning the cluster for mlocking pages. @@ -1394,7 +1405,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush_notify(vma, address, pte); + pteval = ptep_clear_flush(vma, address, pte); /* If nonlinear, store the file page offset in the pte. */ if (page->index != linear_page_index(vma, address)) @@ -1410,6 +1421,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, (*mapcount)--; } pte_unmap_unlock(pte - 1, ptl); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); if (locked_vma) up_read(&vma->vm_mm->mmap_sem); return ret;