diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e433bbc6e7..22165cb1890 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -199,6 +199,7 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); + unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); /* notification that a previously read-only page is about to become @@ -593,6 +594,12 @@ static inline int page_mapped(struct page *page) #define NOPAGE_SIGBUS (NULL) #define NOPAGE_OOM ((struct page *) (-1)) +/* + * Error return values for the *_nopfn functions + */ +#define NOPFN_SIGBUS ((unsigned long) -1) +#define NOPFN_OOM ((unsigned long) -2) + /* * Different kinds of faults, as returned by handle_mm_fault(). * Used to decide whether a process gets delivered SIGBUS or diff --git a/mm/memory.c b/mm/memory.c index 92a3ebd8d79..f2ef1dcfff7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2255,6 +2255,54 @@ oom: return VM_FAULT_OOM; } +/* + * do_no_pfn() tries to create a new page mapping for a page without + * a struct_page backing it + * + * As this is called only for pages that do not currently exist, we + * do not need to flush old virtual caches or the TLB. + * + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults), and pte mapped but not yet locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. + * + * It is expected that the ->nopfn handler always returns the same pfn + * for a given virtual mapping. + * + * Mark this `noinline' to prevent it from bloating the main pagefault code. + */ +static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + int write_access) +{ + spinlock_t *ptl; + pte_t entry; + unsigned long pfn; + int ret = VM_FAULT_MINOR; + + pte_unmap(page_table); + BUG_ON(!(vma->vm_flags & VM_PFNMAP)); + BUG_ON(is_cow_mapping(vma->vm_flags)); + + pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); + if (pfn == NOPFN_OOM) + return VM_FAULT_OOM; + if (pfn == NOPFN_SIGBUS) + return VM_FAULT_SIGBUS; + + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + + /* Only go through if we didn't race with anybody else... */ + if (pte_none(*page_table)) { + entry = pfn_pte(pfn, vma->vm_page_prot); + if (write_access) + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + set_pte_at(mm, address, page_table, entry); + } + pte_unmap_unlock(page_table, ptl); + return ret; +} + /* * Fault of a previously existing named mapping. Repopulate the pte * from the encoded file_pte if possible. This enables swappable @@ -2317,11 +2365,17 @@ static inline int handle_pte_fault(struct mm_struct *mm, old_entry = entry = *pte; if (!pte_present(entry)) { if (pte_none(entry)) { - if (!vma->vm_ops || !vma->vm_ops->nopage) - return do_anonymous_page(mm, vma, address, - pte, pmd, write_access); - return do_no_page(mm, vma, address, - pte, pmd, write_access); + if (vma->vm_ops) { + if (vma->vm_ops->nopage) + return do_no_page(mm, vma, address, + pte, pmd, + write_access); + if (unlikely(vma->vm_ops->nopfn)) + return do_no_pfn(mm, vma, address, pte, + pmd, write_access); + } + return do_anonymous_page(mm, vma, address, + pte, pmd, write_access); } if (pte_file(entry)) return do_file_page(mm, vma, address,