diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index fa838e2eb65..401d26b464f 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -162,6 +163,30 @@ static unsigned long entry_point(void *start, void *end, errx(1, "Is this image a genuine lguest?"); } +/* This routine is used to load the kernel or initrd. It tries mmap, but if + * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), + * it falls back to reading the memory in. */ +static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) +{ + ssize_t r; + + /* We map writable even though for some segments are marked read-only. + * The kernel really wants to be writable: it patches its own + * instructions. + * + * MAP_PRIVATE means that the page won't be copied until a write is + * done to it. This allows us to share untouched memory between + * Guests. */ + if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) + return; + + /* pread does a seek and a read in one shot: saves a few lines. */ + r = pread(fd, addr, len, offset); + if (r != len) + err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); +} + /* This routine takes an open vmlinux image, which is in ELF, and maps it into * the Guest memory. ELF = Embedded Linking Format, which is the format used * by all modern binaries on Linux including the kernel. @@ -176,7 +201,6 @@ static unsigned long entry_point(void *start, void *end, static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, unsigned long *page_offset) { - void *addr; Elf32_Phdr phdr[ehdr->e_phnum]; unsigned int i; unsigned long start = -1UL, end = 0; @@ -227,23 +251,9 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, if (phdr[i].p_paddr + phdr[i].p_filesz > end) end = phdr[i].p_paddr + phdr[i].p_filesz; - /* We map this section of the file at its physical address. We - * map it read & write even if the header says this segment is - * read-only. The kernel really wants to be writable: it - * patches its own instructions which would normally be - * read-only. - * - * MAP_PRIVATE means that the page won't be copied until a - * write is done to it. This allows us to share much of the - * kernel memory between Guests. */ - addr = mmap((void *)phdr[i].p_paddr, - phdr[i].p_filesz, - PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_FIXED|MAP_PRIVATE, - elf_fd, phdr[i].p_offset); - if (addr != (void *)phdr[i].p_paddr) - err(1, "Mmaping vmlinux seg %i gave %p not %p", - i, addr, (void *)phdr[i].p_paddr); + /* We map this section of the file at its physical address. */ + map_at(elf_fd, (void *)phdr[i].p_paddr, + phdr[i].p_offset, phdr[i].p_filesz); } return entry_point((void *)start, (void *)end, *page_offset); @@ -402,27 +412,20 @@ static unsigned long load_initrd(const char *name, unsigned long mem) int ifd; struct stat st; unsigned long len; - void *iaddr; ifd = open_or_die(name, O_RDONLY); /* fstat() is needed to get the file size. */ if (fstat(ifd, &st) < 0) err(1, "fstat() on initrd '%s'", name); - /* The length needs to be rounded up to a page size: mmap needs the - * address to be page aligned. */ + /* We map the initrd at the top of memory, but mmap wants it to be + * page-aligned, so we round the size up for that. */ len = page_align(st.st_size); - /* We map the initrd at the top of memory. */ - iaddr = mmap((void *)mem - len, st.st_size, - PROT_READ|PROT_EXEC|PROT_WRITE, - MAP_FIXED|MAP_PRIVATE, ifd, 0); - if (iaddr != (void *)mem - len) - err(1, "Mmaping initrd '%s' returned %p not %p", - name, iaddr, (void *)mem - len); + map_at(ifd, (void *)mem - len, 0, st.st_size); /* Once a file is mapped, you can close the file descriptor. It's a * little odd, but quite useful. */ close(ifd); - verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); + verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); /* We return the initrd size. */ return len;