diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a59070b0905..b44e6a88ea7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1708,7 +1708,20 @@ static void convert_pfn_mfn(void *v) for (i = 0; i < PTRS_PER_PTE; i++) pte[i] = xen_make_pte(pte[i].pte); } - +static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, + unsigned long addr) +{ + if (*pt_base == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_base)++; + } + if (*pt_end == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_end)--; + } +} /* * Set up the initial kernel pagetable. * @@ -1724,6 +1737,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; + unsigned long addr[3]; + unsigned long pt_base, pt_end; + unsigned i; /* max_pfn_mapped is the last pfn mapped in the initial memory * mappings. Considering that on Xen after the kernel mappings we @@ -1731,6 +1747,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) * set max_pfn_mapped to the last real pfn mapped. */ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); + pt_end = pt_base + xen_start_info->nr_pt_frames; + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); @@ -1749,6 +1768,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + addr[0] = (unsigned long)pgd; + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: * Both L4[272][0] and L4[511][511] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space @@ -1782,20 +1804,26 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - /* Switch over */ - pgd = init_level4_pgt; - /* * At this stage there can be no user pgd, and no page * structure to attach it to, so make sure we just set kernel * pgd. */ xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); + __xen_write_cr3(true, __pa(init_level4_pgt)); xen_mc_issue(PARAVIRT_LAZY_CPU); - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); + /* We can't that easily rip out L3 and L2, as the Xen pagetables are + * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for + * the initial domain. For guests using the toolstack, they are in: + * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only + * rip out the [L4] (pgd), but for guests we shave off three pages. + */ + for (i = 0; i < ARRAY_SIZE(addr); i++) + check_pt_base(&pt_base, &pt_end, addr[i]); + + /* Our (by three pages) smaller Xen pagetable that we are using */ + memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);