From 25667d675454f2cd258c5fa798a2281af1ef2ae9 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 6 Mar 2007 13:31:45 -0800 Subject: [PATCH 1/7] Revert "[IA64] swiotlb abstraction (e.g. for Xen)" This reverts commit 51099005ab8e09d68a13fea8d55bc739c1040ca6. --- include/asm-ia64/swiotlb.h | 9 -- include/asm-x86_64/swiotlb.h | 1 - lib/swiotlb.c | 184 +++++++---------------------------- 3 files changed, 35 insertions(+), 159 deletions(-) delete mode 100644 include/asm-ia64/swiotlb.h diff --git a/include/asm-ia64/swiotlb.h b/include/asm-ia64/swiotlb.h deleted file mode 100644 index 452c162dee4..00000000000 --- a/include/asm-ia64/swiotlb.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _ASM_SWIOTLB_H -#define _ASM_SWIOTLB_H 1 - -#include - -#define SWIOTLB_ARCH_NEED_LATE_INIT -#define SWIOTLB_ARCH_NEED_ALLOC - -#endif /* _ASM_SWIOTLB_H */ diff --git a/include/asm-x86_64/swiotlb.h b/include/asm-x86_64/swiotlb.h index ab913ffcad5..f9c589539a8 100644 --- a/include/asm-x86_64/swiotlb.h +++ b/include/asm-x86_64/swiotlb.h @@ -44,7 +44,6 @@ extern void swiotlb_init(void); extern int swiotlb_force; #ifdef CONFIG_SWIOTLB -#define SWIOTLB_ARCH_NEED_ALLOC extern int swiotlb; #else #define swiotlb 0 diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 623a68af8b1..9970e55c90b 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -36,10 +35,8 @@ #define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1))) -#ifndef SG_ENT_VIRT_ADDRESS #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) #define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)) -#endif /* * Maximum allowable number of contiguous slabs to map, @@ -104,25 +101,13 @@ static unsigned int io_tlb_index; * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ -#ifndef SWIOTLB_ARCH_HAS_IO_TLB_ADDR_T -typedef char *io_tlb_addr_t; -#define swiotlb_orig_addr_null(buffer) (!(buffer)) -#define ptr_to_io_tlb_addr(ptr) (ptr) -#define page_to_io_tlb_addr(pg, off) (page_address(pg) + (off)) -#define sg_to_io_tlb_addr(sg) SG_ENT_VIRT_ADDRESS(sg) -#endif -static io_tlb_addr_t *io_tlb_orig_addr; +static unsigned char **io_tlb_orig_addr; /* * Protect the above data structures in the map and unmap calls */ static DEFINE_SPINLOCK(io_tlb_lock); -#ifdef SWIOTLB_EXTRA_VARIABLES -SWIOTLB_EXTRA_VARIABLES; -#endif - -#ifndef SWIOTLB_ARCH_HAS_SETUP_IO_TLB_NPAGES static int __init setup_io_tlb_npages(char *str) { @@ -137,25 +122,9 @@ setup_io_tlb_npages(char *str) swiotlb_force = 1; return 1; } -#endif __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ -#ifndef swiotlb_adjust_size -#define swiotlb_adjust_size(size) ((void)0) -#endif - -#ifndef swiotlb_adjust_seg -#define swiotlb_adjust_seg(start, size) ((void)0) -#endif - -#ifndef swiotlb_print_info -#define swiotlb_print_info(bytes) \ - printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " \ - "0x%lx\n", bytes >> 20, \ - virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)) -#endif - /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -169,8 +138,6 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } - swiotlb_adjust_size(io_tlb_nslabs); - swiotlb_adjust_size(io_tlb_overflow); bytes = io_tlb_nslabs << IO_TLB_SHIFT; @@ -188,14 +155,10 @@ swiotlb_init_with_default_size(size_t default_size) * between io_tlb_start and io_tlb_end. */ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); - for (i = 0; i < io_tlb_nslabs; i++) { - if ( !(i % IO_TLB_SEGSIZE) ) - swiotlb_adjust_seg(io_tlb_start + (i << IO_TLB_SHIFT), - IO_TLB_SEGSIZE << IO_TLB_SHIFT); + for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - } io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(io_tlb_addr_t)); + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); /* * Get the overflow emergency buffer @@ -203,21 +166,17 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); - swiotlb_adjust_seg(io_tlb_overflow_buffer, io_tlb_overflow); - swiotlb_print_info(bytes); + printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", + virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); } -#ifndef __swiotlb_init_with_default_size -#define __swiotlb_init_with_default_size swiotlb_init_with_default_size -#endif void __init swiotlb_init(void) { - __swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ } -#ifdef SWIOTLB_ARCH_NEED_LATE_INIT /* * Systems with larger DMA zones (those that don't support ISA) can * initialize the swiotlb later using the slab allocator if needed. @@ -275,12 +234,12 @@ swiotlb_late_init_with_default_size(size_t default_size) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; - io_tlb_orig_addr = (io_tlb_addr_t *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(io_tlb_addr_t))); + io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * sizeof(char *))); if (!io_tlb_orig_addr) goto cleanup3; - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(io_tlb_addr_t)); + memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); /* * Get the overflow emergency buffer @@ -290,17 +249,19 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " + "0x%lx\n", bytes >> 20, + virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); return 0; cleanup4: - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(io_tlb_addr_t))); + free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * + sizeof(char *))); io_tlb_orig_addr = NULL; cleanup3: - free_pages((unsigned long)io_tlb_list, - get_order(io_tlb_nslabs * sizeof(int))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); io_tlb_list = NULL; cleanup2: io_tlb_end = NULL; @@ -310,9 +271,7 @@ cleanup1: io_tlb_nslabs = req_nslabs; return -ENOMEM; } -#endif -#ifndef SWIOTLB_ARCH_HAS_NEEDS_MAPPING static int address_needs_mapping(struct device *hwdev, dma_addr_t addr) { @@ -323,35 +282,11 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr) return (addr & ~mask) != 0; } -static inline int range_needs_mapping(const void *ptr, size_t size) -{ - return swiotlb_force; -} - -static inline int order_needs_mapping(unsigned int order) -{ - return 0; -} -#endif - -static void -__sync_single(io_tlb_addr_t buffer, char *dma_addr, size_t size, int dir) -{ -#ifndef SWIOTLB_ARCH_HAS_SYNC_SINGLE - if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, buffer, size); - else - memcpy(buffer, dma_addr, size); -#else - __swiotlb_arch_sync_single(buffer, dma_addr, size, dir); -#endif -} - /* * Allocates bounce buffer and returns its kernel virtual address. */ static void * -map_single(struct device *hwdev, io_tlb_addr_t buffer, size_t size, int dir) +map_single(struct device *hwdev, char *buffer, size_t size, int dir) { unsigned long flags; char *dma_addr; @@ -424,7 +359,7 @@ map_single(struct device *hwdev, io_tlb_addr_t buffer, size_t size, int dir) */ io_tlb_orig_addr[index] = buffer; if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); + memcpy(dma_addr, buffer, size); return dma_addr; } @@ -438,18 +373,17 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - io_tlb_addr_t buffer = io_tlb_orig_addr[index]; + char *buffer = io_tlb_orig_addr[index]; /* * First, sync the memory before unmapping the entry */ - if (!swiotlb_orig_addr_null(buffer) - && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) /* * bounce... copy the data back into the original buffer * and * delete the bounce buffer. */ - __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); + memcpy(buffer, dma_addr, size); /* * Return the buffer to the free list by setting the corresponding @@ -482,18 +416,18 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir, int target) { int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - io_tlb_addr_t buffer = io_tlb_orig_addr[index]; + char *buffer = io_tlb_orig_addr[index]; switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); + memcpy(buffer, dma_addr, size); else BUG_ON(dir != DMA_TO_DEVICE); break; case SYNC_FOR_DEVICE: if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); + memcpy(dma_addr, buffer, size); else BUG_ON(dir != DMA_FROM_DEVICE); break; @@ -502,8 +436,6 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, } } -#ifdef SWIOTLB_ARCH_NEED_ALLOC - void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) @@ -519,10 +451,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags |= GFP_DMA; - if (!order_needs_mapping(order)) - ret = (void *)__get_free_pages(flags, order); - else - ret = NULL; + ret = (void *)__get_free_pages(flags, order); if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { /* * The allocated memory isn't reachable by the device. @@ -560,7 +489,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, *dma_handle = dev_addr; return ret; } -EXPORT_SYMBOL(swiotlb_alloc_coherent); void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, @@ -573,9 +501,6 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); } -EXPORT_SYMBOL(swiotlb_free_coherent); - -#endif static void swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) @@ -617,14 +542,13 @@ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!range_needs_mapping(ptr, size) - && !address_needs_mapping(hwdev, dev_addr)) + if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) return dev_addr; /* * Oh well, have to allocate and map a bounce buffer. */ - map = map_single(hwdev, ptr_to_io_tlb_addr(ptr), size, dir); + map = map_single(hwdev, ptr, size, dir); if (!map) { swiotlb_full(hwdev, size, dir, 1); map = io_tlb_overflow_buffer; @@ -752,16 +676,17 @@ int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, int dir) { + void *addr; dma_addr_t dev_addr; int i; BUG_ON(dir == DMA_NONE); for (i = 0; i < nelems; i++, sg++) { - dev_addr = SG_ENT_PHYS_ADDRESS(sg); - if (range_needs_mapping(SG_ENT_VIRT_ADDRESS(sg), sg->length) - || address_needs_mapping(hwdev, dev_addr)) { - void *map = map_single(hwdev, sg_to_io_tlb_addr(sg), sg->length, dir); + addr = SG_ENT_VIRT_ADDRESS(sg); + dev_addr = virt_to_bus(addr); + if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { + void *map = map_single(hwdev, addr, sg->length, dir); if (!map) { /* Don't panic here, we expect map_sg users to do proper error handling. */ @@ -835,44 +760,6 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); } -#ifdef SWIOTLB_ARCH_NEED_MAP_PAGE - -dma_addr_t -swiotlb_map_page(struct device *hwdev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - dma_addr_t dev_addr; - char *map; - - dev_addr = page_to_bus(page) + offset; - if (address_needs_mapping(hwdev, dev_addr)) { - map = map_single(hwdev, page_to_io_tlb_addr(page, offset), size, direction); - if (!map) { - swiotlb_full(hwdev, size, direction, 1); - map = io_tlb_overflow_buffer; - } - dev_addr = virt_to_bus(map); - } - - return dev_addr; -} - -void -swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction direction) -{ - char *dma_addr = bus_to_virt(dev_addr); - - BUG_ON(direction == DMA_NONE); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - unmap_single(hwdev, dma_addr, size, direction); - else if (direction == DMA_FROM_DEVICE) - dma_mark_clean(dma_addr, size); -} - -#endif - int swiotlb_dma_mapping_error(dma_addr_t dma_addr) { @@ -885,13 +772,10 @@ swiotlb_dma_mapping_error(dma_addr_t dma_addr) * during bus mastering, then you would pass 0x00ffffff as the mask to * this function. */ -#ifndef __swiotlb_dma_supported -#define __swiotlb_dma_supported(hwdev, mask) (virt_to_bus(io_tlb_end - 1) <= (mask)) -#endif int swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return __swiotlb_dma_supported(hwdev, mask); + return virt_to_bus(io_tlb_end - 1) <= mask; } EXPORT_SYMBOL(swiotlb_init); @@ -906,4 +790,6 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); EXPORT_SYMBOL(swiotlb_sync_sg_for_device); EXPORT_SYMBOL(swiotlb_dma_mapping_error); +EXPORT_SYMBOL(swiotlb_alloc_coherent); +EXPORT_SYMBOL(swiotlb_free_coherent); EXPORT_SYMBOL(swiotlb_dma_supported); From f4a570997e71b892805a1e71303d09c327af135f Mon Sep 17 00:00:00 2001 From: Horms Date: Tue, 6 Mar 2007 02:34:21 -0800 Subject: [PATCH 2/7] [IA64] point saved_max_pfn to the max_pfn of the entire system Make saved_max_pfn point to max_pfn of entire system. Without this patch is so that vmcore is zero length on ia64. This is because saved_max_pfn was wrongly being set to the max_pfn of the crash kernel's address space, rather than the max_pfg on the physical memory of the machine - the whole purpose of vmcore is to access physical memory that is not part of the crash kernel's addresss space. Signed-off-by: Simon Horman Signed-off-by: Zou Nan hai Sort-Of-Acked-By: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/kernel/efi.c | 6 ++++++ arch/ia64/mm/contig.c | 5 ----- arch/ia64/mm/discontig.c | 6 ------ 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 772ba6fe110..32ce330cbc6 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -21,6 +21,7 @@ * Skip non-WB memory and ignore empty memory ranges. */ #include +#include #include #include #include @@ -1009,6 +1010,11 @@ efi_memmap_init(unsigned long *s, unsigned long *e) } else ae = efi_md_end(md); +#ifdef CONFIG_CRASH_DUMP + /* saved_max_pfn should ignore max_addr= command line arg */ + if (saved_max_pfn < (ae >> PAGE_SHIFT)) + saved_max_pfn = (ae >> PAGE_SHIFT); +#endif /* keep within max_addr= and min_addr= command line arg */ as = max(as, min_addr); ae = min(ae, max_addr); diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index ca4d41e5f17..fb0f4698f5d 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -197,11 +197,6 @@ find_memory (void) find_initrd(); -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we still need to know the real mem - * size before original memory map is reset. */ - saved_max_pfn = max_pfn; -#endif } #ifdef CONFIG_SMP diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 16835108bb5..11a2d8825d8 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -480,12 +480,6 @@ void __init find_memory(void) max_pfn = max_low_pfn; find_initrd(); - -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we still need to know the real mem - * size before original memory map is reset. */ - saved_max_pfn = max_pfn; -#endif } #ifdef CONFIG_SMP From d826393cdebe340b3716002bfb1298ab19b57e83 Mon Sep 17 00:00:00 2001 From: "schwab@suse.de" Date: Tue, 6 Mar 2007 02:34:22 -0800 Subject: [PATCH 3/7] [IA64] Remove stack hard limit on ia64 Un-Breaks pthreads, since Oct 2003. Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- include/asm-ia64/resource.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/asm-ia64/resource.h b/include/asm-ia64/resource.h index 77b1eee01f3..ba2272a87fc 100644 --- a/include/asm-ia64/resource.h +++ b/include/asm-ia64/resource.h @@ -2,7 +2,6 @@ #define _ASM_IA64_RESOURCE_H #include -#define _STK_LIM_MAX DEFAULT_USER_STACK_SIZE #include #endif /* _ASM_IA64_RESOURCE_H */ From a27e5a13d5863bb9de0ac80cb4bb3f4442f0aad1 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Tue, 6 Mar 2007 02:34:23 -0800 Subject: [PATCH 4/7] [IA64] always build arch/ia64/lib/xor.o Always build ia64 xor.o because multiple config options now depend on it. Necessary to build .20-mm* on ia64 when, e.g., CONFIG_ASYNC_TX_DMA is defined. Don't know if '_ASYNC_TX_DMA makes sense on ia64. If not, maybe Kconfig should preclude it. Could have defined a Kconfig option that defaults to true if MD_RAID456 || ASYNC_TX_DMA to control building of xor.o, but xor.o is only 848 bytes and this IS ia64... Signed-off-by: Lee Schermerhorn Cc: Bob Picco Cc: Eric Whitney Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/lib/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index 38fa6e49e79..46edf8444c7 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -9,12 +9,11 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ checksum.o clear_page.o csum_partial_copy.o \ clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ flush.o ip_fast_csum.o do_csum.o \ - memset.o strlen.o + memset.o strlen.o xor.o lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o lib-$(CONFIG_PERFMON) += carta_random.o -lib-$(CONFIG_MD_RAID456) += xor.o AFLAGS___divdi3.o = AFLAGS___udivdi3.o = -DUNSIGNED From 50157b09b33c2ec3637d3b317b06a7235c57c7f2 Mon Sep 17 00:00:00 2001 From: Alexandr Andreev Date: Tue, 6 Mar 2007 02:34:24 -0800 Subject: [PATCH 5/7] [IA64] sync compat getdents Add VERIFY_WRITE check in the beginning like compat_sys_getdents() (EINVAL vs EFAULT). Signed-off-by: Alexandr Andreev Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/ia32/sys_ia32.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index d430d36ae49..0afb4fe7c35 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1267,6 +1267,10 @@ sys32_getdents (unsigned int fd, struct compat_dirent __user *dirent, unsigned i struct getdents32_callback buf; int error; + error = -EFAULT; + if (!access_ok(VERIFY_WRITE, dirent, count)) + goto out; + error = -EBADF; file = fget(fd); if (!file) @@ -1283,10 +1287,10 @@ sys32_getdents (unsigned int fd, struct compat_dirent __user *dirent, unsigned i error = buf.error; lastdirent = buf.previous; if (lastdirent) { - error = -EINVAL; if (put_user(file->f_pos, &lastdirent->d_off)) - goto out_putf; - error = count - buf.count; + error = -EFAULT; + else + error = count - buf.count; } out_putf: From 41d5e5d73ecef4ef56b7b4cde962929a712689b4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Mar 2007 02:34:25 -0800 Subject: [PATCH 6/7] [IA64] permon use-after-free fix Perfmon associates vmalloc()ed memory with a file descriptor, and installs a vma mapping that memory. Unfortunately, the vm_file field is not filled in, so processes with mappings to that memory do not prevent the file from being closed and the memory freed. This results in use-after-free bugs and multiple freeing of pages, etc. I saw this bug on an Altix on SLES9. Haven't reproduced upstream but it looks like the same issue is there. Signed-off-by: Nick Piggin Cc: Stephane Eranian Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/kernel/perfmon.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9ddf896a137..abc7ad03588 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2299,7 +2299,7 @@ pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long ad * allocate a sampling buffer and remaps it into the user address space of the task */ static int -pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) +pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) { struct mm_struct *mm = task->mm; struct vm_area_struct *vma = NULL; @@ -2349,6 +2349,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon * partially initialize the vma for the sampling buffer */ vma->vm_mm = mm; + vma->vm_file = filp; vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ @@ -2387,6 +2388,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon goto error; } + get_file(filp); + /* * now insert the vma in the vm list for the process, must be * done with mmap lock held @@ -2464,7 +2467,7 @@ pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) } static int -pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags, +pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, unsigned int cpu, pfarg_context_t *arg) { pfm_buffer_fmt_t *fmt = NULL; @@ -2505,7 +2508,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int /* * buffer is always remapped into the caller's address space */ - ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr); + ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); if (ret) goto error; /* keep track of user address of buffer */ @@ -2716,7 +2719,7 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg * does the user want to sample? */ if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { - ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req); + ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); if (ret) goto buffer_error; } From cee87af2a5f75713b98d3e65e43872e547122cd5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 6 Mar 2007 02:34:26 -0800 Subject: [PATCH 7/7] [IA64] kexec: Use EFI_LOADER_DATA for ELF core header The address where the ELF core header is stored is passed to the secondary kernel as a kernel command line option. The memory area for this header is also marked as a separate EFI memory descriptor on ia64. The separate EFI memory descriptor is at the moment of the type EFI_UNUSABLE_MEMORY. With such a type the secondary kernel skips over the entire memory granule (config option, 16M or 64M) when detecting memory. If we are lucky we will just lose some memory, but if we happen to have data in the same granule (such as an initramfs image), then this data will never get mapped and the kernel bombs out when trying to access it. So this is an attempt to fix this by changing the EFI memory descriptor type into EFI_LOADER_DATA. This type is the same type used for the kernel data and for initramfs. In the secondary kernel we then handle the ELF core header data the same way as we handle the initramfs image. This patch contains the kernel changes to make this happen. Pretty straightforward, we reserve the area in reserve_memory(). The address for the area comes from the kernel command line and the size comes from the specialized EFI parsing function vmcore_find_descriptor_size(). The kexec-tools-testing code for this can be found here: http://lists.osdl.org/pipermail/fastboot/2007-February/005983.html Signed-off-by: Magnus Damm Cc: Simon Horman Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/kernel/efi.c | 30 ++++++++++++++++++++++++++++++ arch/ia64/kernel/setup.c | 30 ++++++++++++++++++++++++++++++ include/asm-ia64/meminit.h | 6 +++++- 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 32ce330cbc6..4061593e5b1 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1183,3 +1183,33 @@ kdump_find_rsvd_region (unsigned long size, return ~0UL; } #endif + +#ifdef CONFIG_PROC_VMCORE +/* locate the size find a the descriptor at a certain address */ +unsigned long +vmcore_find_descriptor_size (unsigned long address) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + unsigned long ret = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (efi_wb(md) && md->type == EFI_LOADER_DATA + && md->phys_addr == address) { + ret = efi_md_size(md); + break; + } + } + + if (ret == 0) + printk(KERN_WARNING "Cannot locate EFI vmcore descriptor\n"); + + return ret; +} +#endif diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 5fa09d141ab..7d6fe65c93f 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -251,6 +251,12 @@ reserve_memory (void) } #endif +#ifdef CONFIG_PROC_VMCORE + if (reserve_elfcorehdr(&rsvd_region[n].start, + &rsvd_region[n].end) == 0) + n++; +#endif + efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); n++; @@ -453,6 +459,30 @@ static int __init parse_elfcorehdr(char *arg) return 0; } early_param("elfcorehdr", parse_elfcorehdr); + +int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end) +{ + unsigned long length; + + /* We get the address using the kernel command line, + * but the size is extracted from the EFI tables. + * Both address and size are required for reservation + * to work properly. + */ + + if (elfcorehdr_addr >= ELFCORE_ADDR_MAX) + return -EINVAL; + + if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) { + elfcorehdr_addr = ELFCORE_ADDR_MAX; + return -EINVAL; + } + + *start = (unsigned long)__va(elfcorehdr_addr); + *end = *start + length; + return 0; +} + #endif /* CONFIG_PROC_VMCORE */ void __init diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h index 6dd476b652c..21ec5f3d23d 100644 --- a/include/asm-ia64/meminit.h +++ b/include/asm-ia64/meminit.h @@ -17,10 +17,11 @@ * - kernel code & data * - crash dumping code reserved region * - Kernel memory map built from EFI memory map + * - ELF core header * * More could be added if necessary */ -#define IA64_MAX_RSVD_REGIONS 7 +#define IA64_MAX_RSVD_REGIONS 8 struct rsvd_region { unsigned long start; /* virtual address of beginning of element */ @@ -36,6 +37,9 @@ extern void find_initrd (void); extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg); extern void efi_memmap_init(unsigned long *, unsigned long *); +extern unsigned long vmcore_find_descriptor_size(unsigned long address); +extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end); + /* * For rounding an address to the next IA64_GRANULE_SIZE or order */