From 010fc29a45a2e8dbc08bf45ef80b8622619aaae0 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 20 Dec 2012 15:05:06 -0800 Subject: [PATCH 01/25] compaction: fix build error in CMA && !COMPACTION isolate_freepages_block() and isolate_migratepages_range() are used for CMA as well as compaction so it breaks build for CONFIG_CMA && !CONFIG_COMPACTION. This patch fixes it. [akpm@linux-foundation.org: add "do { } while (0)", per Mel] Signed-off-by: Minchan Kim Cc: Mel Gorman Cc: Marek Szyprowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 5ad7f4f4d6f..6b807e46649 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -17,6 +17,21 @@ #include #include "internal.h" +#ifdef CONFIG_COMPACTION +static inline void count_compact_event(enum vm_event_item item) +{ + count_vm_event(item); +} + +static inline void count_compact_events(enum vm_event_item item, long delta) +{ + count_vm_events(item, delta); +} +#else +#define count_compact_event(item) do { } while (0) +#define count_compact_events(item, delta) do { } while (0) +#endif + #if defined CONFIG_COMPACTION || defined CONFIG_CMA #define CREATE_TRACE_POINTS @@ -303,10 +318,9 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (blockpfn == end_pfn) update_pageblock_skip(cc, valid_page, total_isolated, false); - count_vm_events(COMPACTFREE_SCANNED, nr_scanned); + count_compact_events(COMPACTFREE_SCANNED, nr_scanned); if (total_isolated) - count_vm_events(COMPACTISOLATED, total_isolated); - + count_compact_events(COMPACTISOLATED, total_isolated); return total_isolated; } @@ -613,9 +627,9 @@ next_pageblock: trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); - count_vm_events(COMPACTMIGRATE_SCANNED, nr_scanned); + count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned); if (nr_isolated) - count_vm_events(COMPACTISOLATED, nr_isolated); + count_compact_events(COMPACTISOLATED, nr_isolated); return low_pfn; } @@ -1110,7 +1124,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, if (!order || !may_enter_fs || !may_perform_io) return rc; - count_vm_event(COMPACTSTALL); + count_compact_event(COMPACTSTALL); #ifdef CONFIG_CMA if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) From c8b74c2f6604923de91f8aa6539f8bb934736754 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Thu, 20 Dec 2012 15:05:07 -0800 Subject: [PATCH 02/25] mm: fix calculation of dirtyable memory The system uses global_dirtyable_memory() to calculate number of dirtyable pages/pages that can be allocated to the page cache. A bug causes an underflow thus making the page count look like a big unsigned number. This in turn confuses the dirty writeback throttling to aggressively write back pages as they become dirty (usually 1 page at a time). This generally only affects systems with highmem because the underflowed count gets subtracted from the global count of dirtyable memory. The problem was introduced with v3.2-4896-gab8fabd Fix is to ensure we don't get an underflowed total of either highmem or global dirtyable memory. Signed-off-by: Sonny Rao Signed-off-by: Puneet Kumar Acked-by: Johannes Weiner Tested-by: Damien Wyart Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6f427122449..0713bfbf095 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -200,6 +200,18 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) x += zone_page_state(z, NR_FREE_PAGES) + zone_reclaimable_pages(z) - z->dirty_balance_reserve; } + /* + * Unreclaimable memory (kernel memory or anonymous memory + * without swap) can bring down the dirtyable pages below + * the zone's dirty balance reserve and the above calculation + * will underflow. However we still want to add in nodes + * which are below threshold (negative values) to get a more + * accurate calculation but make sure that the total never + * underflows. + */ + if ((long)x < 0) + x = 0; + /* * Make sure that the number of highmem pages is never larger * than the number of the total dirtyable memory. This can only @@ -222,8 +234,8 @@ static unsigned long global_dirtyable_memory(void) { unsigned long x; - x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() - - dirty_balance_reserve; + x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); + x -= min(x, dirty_balance_reserve); if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); @@ -290,9 +302,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone) * highmem zone can hold its share of dirty pages, so we don't * care about vm_highmem_is_dirtyable here. */ - return zone_page_state(zone, NR_FREE_PAGES) + - zone_reclaimable_pages(zone) - - zone->dirty_balance_reserve; + unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) + + zone_reclaimable_pages(zone); + + /* don't allow this to underflow */ + nr_pages -= min(nr_pages, zone->dirty_balance_reserve); + return nr_pages; } /** From 038b358e5592eff9be175cf6601042ecc8b28179 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 20 Dec 2012 15:05:10 -0800 Subject: [PATCH 03/25] Documentation: kernel-parameters.txt remove capability.disable Remove the documentation for capability.disable. The code supporting this parameter was removed with commit 5915eb53861c ("security: remove dummy module") Signed-off-by: Josh Boyer Acked-by: Serge Hallyn Cc: Rob Landley Cc: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ddd84d62718..363e348bff9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -446,12 +446,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. possible to determine what the correct size should be. This option provides an override for these situations. - capability.disable= - [SECURITY] Disable capabilities. This would normally - be used only if an alternative security model is to be - configured. Potentially dangerous and should only be - used if you are entirely sure of the consequences. - ccw_timeout_log [S390] See Documentation/s390/CommonIO for details. From f1d8e614d74b09531b9a85e812485340f3df7b1c Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 20 Dec 2012 15:05:13 -0800 Subject: [PATCH 04/25] drivers/firmware/dmi_scan.c: check dmi version when get system uuid As of version 2.6 of the SMBIOS specification, the first 3 fields of the UUID are supposed to be little-endian encoded. Also a minor fix to match variable meaning and mute checkpatch.pl [akpm@linux-foundation.org: tweak code comment] Signed-off-by: Zhenzhong Duan Cc: Feng Jin Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/dmi_scan.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index b298158cb92..3714e3c03df 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -16,6 +16,7 @@ */ static char dmi_empty_string[] = " "; +static u16 __initdata dmi_ver; /* * Catch too early calls to dmi_check_system(): */ @@ -161,8 +162,10 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde return; for (i = 0; i < 16 && (is_ff || is_00); i++) { - if(d[i] != 0x00) is_ff = 0; - if(d[i] != 0xFF) is_00 = 0; + if (d[i] != 0x00) + is_00 = 0; + if (d[i] != 0xFF) + is_ff = 0; } if (is_ff || is_00) @@ -172,7 +175,15 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde if (!s) return; - sprintf(s, "%pUB", d); + /* + * As of version 2.6 of the SMBIOS specification, the first 3 fields of + * the UUID are supposed to be little-endian encoded. The specification + * says that this is the defacto standard. + */ + if (dmi_ver >= 0x0206) + sprintf(s, "%pUL", d); + else + sprintf(s, "%pUB", d); dmi_ident[slot] = s; } @@ -414,6 +425,7 @@ static int __init dmi_present(const char __iomem *p) * DMI version 0.0 means that the real version is taken from * the SMBIOS version, which we don't know at this point. */ + dmi_ver = (buf[14] & 0xf0) << 4 | (buf[14] & 0x0f); if (buf[14] != 0) printk(KERN_INFO "DMI %d.%d present.\n", buf[14] >> 4, buf[14] & 0xF); From 9f9c9cbb60576a1518d0bf93fb8e499cffccf377 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 20 Dec 2012 15:05:14 -0800 Subject: [PATCH 05/25] drivers/firmware/dmi_scan.c: fetch dmi version from SMBIOS if it exists The right dmi version is in SMBIOS if it's zero in DMI region This issue was originally found from an oracle bug. One customer noticed system UUID doesn't match between dmidecode & uek2. - HP ProLiant BL460c G6 : # cat /sys/devices/virtual/dmi/id/product_uuid 00000000-0000-4C48-3031-4D5030333531 # dmidecode | grep -i uuid UUID: 00000000-0000-484C-3031-4D5030333531 From SMBIOS 2.6 on, spec use little-endian encoding for UUID other than network byte order. So we need to get dmi version to distinguish. If version is 0.0, the real version is taken from the SMBIOS version. This is part of original kernel comment in code. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Zhenzhong Duan Cc: Feng Jin Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/dmi_scan.c | 62 ++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 3714e3c03df..fd3ae6290d7 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -119,12 +119,12 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, return 0; } -static int __init dmi_checksum(const u8 *buf) +static int __init dmi_checksum(const u8 *buf, u8 len) { u8 sum = 0; int a; - for (a = 0; a < 15; a++) + for (a = 0; a < len; a++) sum += buf[a]; return sum == 0; @@ -415,30 +415,57 @@ static int __init dmi_present(const char __iomem *p) u8 buf[15]; memcpy_fromio(buf, p, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + if (dmi_checksum(buf, 15)) { dmi_num = (buf[13] << 8) | buf[12]; dmi_len = (buf[7] << 8) | buf[6]; dmi_base = (buf[11] << 24) | (buf[10] << 16) | (buf[9] << 8) | buf[8]; - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - dmi_ver = (buf[14] & 0xf0) << 4 | (buf[14] & 0x0f); - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); if (dmi_walk_early(dmi_decode) == 0) { + if (dmi_ver) + pr_info("SMBIOS %d.%d present.\n", + dmi_ver >> 8, dmi_ver & 0xFF); + else { + dmi_ver = (buf[14] & 0xF0) << 4 | + (buf[14] & 0x0F); + pr_info("Legacy DMI %d.%d present.\n", + dmi_ver >> 8, dmi_ver & 0xFF); + } dmi_dump_ids(); return 0; } } + dmi_ver = 0; return 1; } +static int __init smbios_present(const char __iomem *p) +{ + u8 buf[32]; + int offset = 0; + + memcpy_fromio(buf, p, 32); + if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) { + dmi_ver = (buf[6] << 8) + buf[7]; + + /* Some BIOS report weird SMBIOS version, fix that up */ + switch (dmi_ver) { + case 0x021F: + case 0x0221: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", + dmi_ver & 0xFF, 3); + dmi_ver = 0x0203; + break; + case 0x0233: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6); + dmi_ver = 0x0206; + break; + } + offset = 16; + } + return dmi_present(buf + offset); +} + void __init dmi_scan_machine(void) { char __iomem *p, *q; @@ -456,7 +483,7 @@ void __init dmi_scan_machine(void) if (p == NULL) goto error; - rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ + rc = smbios_present(p); dmi_iounmap(p, 32); if (!rc) { dmi_available = 1; @@ -474,7 +501,12 @@ void __init dmi_scan_machine(void) goto error; for (q = p; q < p + 0x10000; q += 16) { - rc = dmi_present(q); + if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0) + rc = smbios_present(q); + else if (memcmp(q, "_DMI_", 5) == 0) + rc = dmi_present(q); + else + continue; if (!rc) { dmi_available = 1; dmi_iounmap(p, 0x10000); From b66c5984017533316fd1951770302649baf1aa33 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 20 Dec 2012 15:05:16 -0800 Subject: [PATCH 06/25] exec: do not leave bprm->interp on stack If a series of scripts are executed, each triggering module loading via unprintable bytes in the script header, kernel stack contents can leak into the command line. Normally execution of binfmt_script and binfmt_misc happens recursively. However, when modules are enabled, and unprintable bytes exist in the bprm->buf, execution will restart after attempting to load matching binfmt modules. Unfortunately, the logic in binfmt_script and binfmt_misc does not expect to get restarted. They leave bprm->interp pointing to their local stack. This means on restart bprm->interp is left pointing into unused stack memory which can then be copied into the userspace argv areas. After additional study, it seems that both recursion and restart remains the desirable way to handle exec with scripts, misc, and modules. As such, we need to protect the changes to interp. This changes the logic to require allocation for any changes to the bprm->interp. To avoid adding a new kmalloc to every exec, the default value is left as-is. Only when passing through binfmt_script or binfmt_misc does an allocation take place. For a proof of concept, see DoTest.sh from: http://www.halfdog.net/Security/2012/LinuxKernelBinfmtScriptStackDataDisclosure/ Signed-off-by: Kees Cook Cc: halfdog Cc: P J P Cc: Alexander Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 5 ++++- fs/binfmt_script.c | 4 +++- fs/exec.c | 15 +++++++++++++++ include/linux/binfmts.h | 1 + 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9be335fb8a7..0c8869fdd14 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -172,7 +172,10 @@ static int load_misc_binary(struct linux_binprm *bprm) goto _error; bprm->argc ++; - bprm->interp = iname; /* for binfmt_script */ + /* Update interp in case binfmt_script needs it. */ + retval = bprm_change_interp(iname, bprm); + if (retval < 0) + goto _error; interp_file = open_exec (iname); retval = PTR_ERR (interp_file); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 1610a91637e..5027a3e1492 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -80,7 +80,9 @@ static int load_script(struct linux_binprm *bprm) retval = copy_strings_kernel(1, &i_name, bprm); if (retval) return retval; bprm->argc++; - bprm->interp = interp; + retval = bprm_change_interp(interp, bprm); + if (retval < 0) + return retval; /* * OK, now restart the process with the interpreter's dentry. diff --git a/fs/exec.c b/fs/exec.c index d8e1191cb11..237d5342786 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1175,9 +1175,24 @@ void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } + /* If a binfmt changed the interp, free it. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); kfree(bprm); } +int bprm_change_interp(char *interp, struct linux_binprm *bprm) +{ + /* If a binfmt changed the interp, free it first. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); + bprm->interp = kstrdup(interp, GFP_KERNEL); + if (!bprm->interp) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(bprm_change_interp); + /* * install the new credentials for this executable */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index a4c2b565c83..bdf3965f0a2 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -112,6 +112,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); +extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); From bcc2b02f4c1b36bc67272df7119b75bac78525ab Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 20 Dec 2012 15:05:18 -0800 Subject: [PATCH 07/25] mm: cma: WARN if freed memory is still in use Memory returned to free_contig_range() must have no other references. Let kernel to complain loudly if page reference count is not equal to 1. [rientjes@google.com: support sparsemem] Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park Acked-by: Michal Nazarewicz Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2ad2ad168ef..4ba5e37127f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5978,8 +5978,15 @@ done: void free_contig_range(unsigned long pfn, unsigned nr_pages) { - for (; nr_pages--; ++pfn) - __free_page(pfn_to_page(pfn)); + unsigned int count = 0; + + for (; nr_pages--; pfn++) { + struct page *page = pfn_to_page(pfn); + + count += page_count(page) != 1; + __free_page(page); + } + WARN(count != 0, "%d pages are still in use!\n", count); } #endif From ba3f7a1765dc4095c757ac82eb14d6824c45e87d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 20 Dec 2012 15:05:19 -0800 Subject: [PATCH 08/25] drivers/rtc/rtc-imxdi.c: must include Add the missing header include for spinlocks, to avoid potential build failures on specific architectures or configurations. Signed-off-by: Jean Delvare Acked-by: Sascha Hauer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-imxdi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 18a4f0dd78a..8da7a5cf83c 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include From 44fd07e989a9a27476d8963296688127a4fd4df4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 20 Dec 2012 15:05:21 -0800 Subject: [PATCH 09/25] kcmp: include linux/ptrace.h This makes it compile on s390. After all the ptrace_may_access (which we use this file) is declared exactly in linux/ptrace.h. This is preparatory work to wire this syscall up on all archs. Signed-off-by: Cyrill Gorcunov Signed-off-by: Alexander Kartashov Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kcmp.c b/kernel/kcmp.c index 30b7b225306..e30ac0fe61c 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include From 5daa669c80c121ab75ecdf1c8e2df52f072fd25e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 20 Dec 2012 15:05:24 -0800 Subject: [PATCH 10/25] hfsplus: avoid crash on failed block map free If the read fails we kmap an error code. This doesn't end well. Instead print a critical error and pray. This mirrors the rest of the fs behaviour with critical error cases. Acked-by: Vyacheslav Dubeyko Signed-off-by: Alan Cox Signed-off-by: Vyacheslav Dubeyko Cc: Al Viro Cc: Christoph Hellwig Cc: Jan Kara Acked-by: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/bitmap.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 4cfbe2edd29..6feefc0cb48 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -176,12 +176,14 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); /* are all of the bits in range? */ if ((offset + count) > sbi->total_blocks) - return -2; + return -ENOENT; mutex_lock(&sbi->alloc_mutex); mapping = sbi->alloc_file->i_mapping; pnr = offset / PAGE_CACHE_BITS; page = read_mapping_page(mapping, pnr, NULL); + if (IS_ERR(page)) + goto kaboom; pptr = kmap(page); curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; end = pptr + PAGE_CACHE_BITS / 32; @@ -214,6 +216,8 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) set_page_dirty(page); kunmap(page); page = read_mapping_page(mapping, ++pnr, NULL); + if (IS_ERR(page)) + goto kaboom; pptr = kmap(page); curr = pptr; end = pptr + PAGE_CACHE_BITS / 32; @@ -232,4 +236,11 @@ out: mutex_unlock(&sbi->alloc_mutex); return 0; + +kaboom: + printk(KERN_CRIT "hfsplus: unable to mark blocks free: error %ld\n", + PTR_ERR(page)); + mutex_unlock(&sbi->alloc_mutex); + + return -EIO; } From 1b243fd39bd605cdfc482bba4e56b0cb34b28f27 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Thu, 20 Dec 2012 15:05:25 -0800 Subject: [PATCH 11/25] hfsplus: rework processing errors in hfsplus_free_extents() Currently, it doesn't process error codes from the hfsplus_block_free() call in hfsplus_free_extents() method. Add some error code processing. Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Cc: Al Viro Cc: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/extents.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 5849e3ef35c..eba76eab6d6 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -329,6 +329,7 @@ static int hfsplus_free_extents(struct super_block *sb, { u32 count, start; int i; + int err = 0; hfsplus_dump_extent(extent); for (i = 0; i < 8; extent++, i++) { @@ -345,18 +346,33 @@ found: for (;;) { start = be32_to_cpu(extent->start_block); if (count <= block_nr) { - hfsplus_block_free(sb, start, count); + err = hfsplus_block_free(sb, start, count); + if (err) { + printk(KERN_ERR "hfs: can't free extent\n"); + dprint(DBG_EXTENT, " start: %u count: %u\n", + start, count); + } extent->block_count = 0; extent->start_block = 0; block_nr -= count; } else { count -= block_nr; - hfsplus_block_free(sb, start + count, block_nr); + err = hfsplus_block_free(sb, start + count, block_nr); + if (err) { + printk(KERN_ERR "hfs: can't free extent\n"); + dprint(DBG_EXTENT, " start: %u count: %u\n", + start, count); + } extent->block_count = cpu_to_be32(count); block_nr = 0; } - if (!block_nr || !i) - return 0; + if (!block_nr || !i) { + /* + * Try to free all extents and + * return only last error + */ + return err; + } i--; extent--; count = be32_to_cpu(extent->block_count); From 81cc7fad552bc9e4fa8c1f25becbecaaa1d41b67 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Thu, 20 Dec 2012 15:05:28 -0800 Subject: [PATCH 12/25] hfsplus: rework processing of hfs_btree_write() returned error Add to hfs_btree_write() a return of -EIO on failure of b-tree node searching. Also add logic ofor processing errors from hfs_btree_write() in hfsplus_system_write_inode() with a message about b-tree writing failure. [akpm@linux-foundation.org: reduce scope of `err', print errno on error] Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Cc: Al Viro Acked-by: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/btree.c | 5 +++-- fs/hfsplus/hfsplus_fs.h | 2 +- fs/hfsplus/super.c | 10 ++++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 21023d9f8ff..685d07d0ed1 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -159,7 +159,7 @@ void hfs_btree_close(struct hfs_btree *tree) kfree(tree); } -void hfs_btree_write(struct hfs_btree *tree) +int hfs_btree_write(struct hfs_btree *tree) { struct hfs_btree_header_rec *head; struct hfs_bnode *node; @@ -168,7 +168,7 @@ void hfs_btree_write(struct hfs_btree *tree) node = hfs_bnode_find(tree, 0); if (IS_ERR(node)) /* panic? */ - return; + return -EIO; /* Load the header */ page = node->page[0]; head = (struct hfs_btree_header_rec *)(kmap(page) + @@ -186,6 +186,7 @@ void hfs_btree_write(struct hfs_btree *tree) kunmap(page); set_page_dirty(page); hfs_bnode_put(node); + return 0; } static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index c571de224b1..a6da86b1b4c 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -335,7 +335,7 @@ int hfsplus_block_free(struct super_block *, u32, u32); /* btree.c */ struct hfs_btree *hfs_btree_open(struct super_block *, u32); void hfs_btree_close(struct hfs_btree *); -void hfs_btree_write(struct hfs_btree *); +int hfs_btree_write(struct hfs_btree *); struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *); void hfs_bmap_free(struct hfs_bnode *); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 811a84d2d96..2036f585b09 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -127,8 +127,14 @@ static int hfsplus_system_write_inode(struct inode *inode) hfsplus_mark_mdb_dirty(inode->i_sb); } hfsplus_inode_write_fork(inode, fork); - if (tree) - hfs_btree_write(tree); + if (tree) { + int err = hfs_btree_write(tree); + if (err) { + printk(KERN_ERR "hfs: b-tree write err: %d, ino %lu\n", + err, inode->i_ino); + return err; + } + } return 0; } From bffdd661bd424ea4298639805bfcbcaf8ffb62f2 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Thu, 20 Dec 2012 15:05:29 -0800 Subject: [PATCH 13/25] hfsplus: add error message for the case of failure of sync fs in delayed_sync_fs() method Add an error message for the case of failure of sync fs in delayed_sync_fs() method. Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Cc: Al Viro Cc: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 2036f585b09..796198d2655 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -232,6 +232,7 @@ out: static void delayed_sync_fs(struct work_struct *work) { + int err; struct hfsplus_sb_info *sbi; sbi = container_of(work, struct hfsplus_sb_info, sync_work.work); @@ -240,7 +241,9 @@ static void delayed_sync_fs(struct work_struct *work) sbi->work_queued = 0; spin_unlock(&sbi->work_lock); - hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); + err = hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); + if (err) + printk(KERN_ERR "hfs: delayed sync fs err %d\n", err); } void hfsplus_mark_mdb_dirty(struct super_block *sb) From 2c79737af83e0d586899f48c6010148ea2064369 Mon Sep 17 00:00:00 2001 From: Jeremy Eder Date: Thu, 20 Dec 2012 15:05:32 -0800 Subject: [PATCH 14/25] mm: clean up transparent hugepage sysfs error messages Clarify error messages and correct a few typos in the transparent hugepage sysfs init code. Signed-off-by: Jeremy Eder Acked-by: Rafael Aquini Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 32754eece63..9e894edc781 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -574,19 +574,19 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); if (unlikely(!*hugepage_kobj)) { - printk(KERN_ERR "hugepage: failed kobject create\n"); + printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n"); return -ENOMEM; } err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group); if (err) { - printk(KERN_ERR "hugepage: failed register hugeage group\n"); + printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto delete_obj; } err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group); if (err) { - printk(KERN_ERR "hugepage: failed register hugeage group\n"); + printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto remove_hp_group; } From 5abe257af8b95857b95fa0ba694530b446ae32d8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 20 Dec 2012 15:05:34 -0800 Subject: [PATCH 15/25] revert "rtc: recycle id when unloading a rtc driver" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert commit 2830a6d20139df2198d63235df7957712adb28e5. We already perform the ida_simple_remove() in rtc_device_release(), which is an appropriate place. Commit 2830a6d20 ("rtc: recycle id when unloading a rtc driver") caused the kernel to emit ida_remove called for id=0 which is not allocated. warnings when rtc_device_release() tries to release an alread-released ID. Let's restore things to their previous state and then work out why Vincent's kernel wasn't calling rtc_device_release() - presumably a bug in a specific sub-driver. Reported-by: Lothar Waßmann Acked-by: Alexander Holler Cc: Vincent Palatin Cc: [3.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/class.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index f8a0aab218c..5143629dedb 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -244,7 +244,6 @@ void rtc_device_unregister(struct rtc_device *rtc) rtc_proc_del_device(rtc); device_unregister(&rtc->dev); rtc->ops = NULL; - ida_simple_remove(&rtc_ida, rtc->id); mutex_unlock(&rtc->ops_lock); put_device(&rtc->dev); } From 495e9d84607cda966ba6d223d5eb9df0070cd21a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 20 Dec 2012 15:05:37 -0800 Subject: [PATCH 16/25] checkpatch: warn on uapi #includes that #include Acked-by: David Howells Acked-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1d6e4c54137..4d2c7dfdaab 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2226,8 +2226,11 @@ sub process { my $path = $1; if ($path =~ m{//}) { ERROR("MALFORMED_INCLUDE", - "malformed #include filename\n" . - $herecurr); + "malformed #include filename\n" . $herecurr); + } + if ($path =~ "^uapi/" && $realfile =~ m@\binclude/uapi/@) { + ERROR("UAPI_INCLUDE", + "No #include in ...include/uapi/... should use a uapi/ path prefix\n" . $herecurr); } } From 154b454edaf6d94a69016db6c342c57fa935bbe9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Dec 2012 15:05:40 -0800 Subject: [PATCH 17/25] memcg: don't register hotcpu notifier from ->css_alloc() Commit 648bb56d076b ("cgroup: lock cgroup_mutex in cgroup_init_subsys()") made cgroup_init_subsys() grab cgroup_mutex before invoking ->css_alloc() for the root css. Because memcg registers hotcpu notifier from ->css_alloc() for the root css, this introduced circular locking dependency between cgroup_mutex and cpu hotplug. Fix it by moving hotcpu notifier registration to a subsys initcall. ====================================================== [ INFO: possible circular locking dependency detected ] 3.7.0-rc4-work+ #42 Not tainted ------------------------------------------------------- bash/645 is trying to acquire lock: (cgroup_mutex){+.+.+.}, at: [] cgroup_lock+0x17/0x20 but task is already holding lock: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2f/0x60 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (cpu_hotplug.lock){+.+.+.}: lock_acquire+0x97/0x1e0 mutex_lock_nested+0x61/0x3b0 get_online_cpus+0x3c/0x60 rebuild_sched_domains_locked+0x1b/0x70 cpuset_write_resmask+0x298/0x2c0 cgroup_file_write+0x1ef/0x300 vfs_write+0xa8/0x160 sys_write+0x52/0xa0 system_call_fastpath+0x16/0x1b -> #0 (cgroup_mutex){+.+.+.}: __lock_acquire+0x14ce/0x1d20 lock_acquire+0x97/0x1e0 mutex_lock_nested+0x61/0x3b0 cgroup_lock+0x17/0x20 cpuset_handle_hotplug+0x1b/0x560 cpuset_update_active_cpus+0xe/0x10 cpuset_cpu_inactive+0x47/0x50 notifier_call_chain+0x66/0x150 __raw_notifier_call_chain+0xe/0x10 __cpu_notify+0x20/0x40 _cpu_down+0x7e/0x2f0 cpu_down+0x36/0x50 store_online+0x5d/0xe0 dev_attr_store+0x18/0x30 sysfs_write_file+0xe0/0x150 vfs_write+0xa8/0x160 sys_write+0x52/0xa0 system_call_fastpath+0x16/0x1b other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(cgroup_mutex); lock(cpu_hotplug.lock); lock(cgroup_mutex); *** DEADLOCK *** 5 locks held by bash/645: #0: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x48/0x150 #1: (s_active#42){.+.+.+}, at: [] sysfs_write_file+0xc8/0x150 #2: (x86_cpu_hotplug_driver_mutex){+.+...}, at: [] cpu_hotplug_driver_lock+0x1 +7/0x20 #3: (cpu_add_remove_lock){+.+.+.}, at: [] cpu_maps_update_begin+0x17/0x20 #4: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2f/0x60 stack backtrace: Pid: 645, comm: bash Not tainted 3.7.0-rc4-work+ #42 Call Trace: print_circular_bug+0x28e/0x29f __lock_acquire+0x14ce/0x1d20 lock_acquire+0x97/0x1e0 mutex_lock_nested+0x61/0x3b0 cgroup_lock+0x17/0x20 cpuset_handle_hotplug+0x1b/0x560 cpuset_update_active_cpus+0xe/0x10 cpuset_cpu_inactive+0x47/0x50 notifier_call_chain+0x66/0x150 __raw_notifier_call_chain+0xe/0x10 __cpu_notify+0x20/0x40 _cpu_down+0x7e/0x2f0 cpu_down+0x36/0x50 store_online+0x5d/0xe0 dev_attr_store+0x18/0x30 sysfs_write_file+0xe0/0x150 vfs_write+0xa8/0x160 sys_write+0x52/0xa0 system_call_fastpath+0x16/0x1b Signed-off-by: Tejun Heo Reported-by: Fengguang Wu Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f3009b4bae5..09255ec8159 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6090,7 +6090,6 @@ mem_cgroup_css_alloc(struct cgroup *cont) &per_cpu(memcg_stock, cpu); INIT_WORK(&stock->work, drain_local_stock); } - hotcpu_notifier(memcg_cpu_hotplug_callback, 0); } else { parent = mem_cgroup_from_cont(cont->parent); memcg->use_hierarchy = parent->use_hierarchy; @@ -6756,6 +6755,19 @@ struct cgroup_subsys mem_cgroup_subsys = { .use_id = 1, }; +/* + * The rest of init is performed during ->css_alloc() for root css which + * happens before initcalls. hotcpu_notifier() can't be done together as + * it would introduce circular locking by adding cgroup_lock -> cpu hotplug + * dependency. Do it from a subsys_initcall(). + */ +static int __init mem_cgroup_init(void) +{ + hotcpu_notifier(memcg_cpu_hotplug_callback, 0); + return 0; +} +subsys_initcall(mem_cgroup_init); + #ifdef CONFIG_MEMCG_SWAP static int __init enable_swap_account(char *s) { From c4e18497d8fd92eef2c6e7eadcc1a107ccd115ea Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 20 Dec 2012 15:05:42 -0800 Subject: [PATCH 18/25] linux/kernel.h: fix DIV_ROUND_CLOSEST with unsigned divisors Commit 263a523d18bc ("linux/kernel.h: Fix warning seen with W=1 due to change in DIV_ROUND_CLOSEST") fixes a warning seen with W=1 due to change in DIV_ROUND_CLOSEST. Unfortunately, the C compiler converts divide operations with unsigned divisors to unsigned, even if the dividend is signed and negative (for example, -10 / 5U = 858993457). The C standard says "If one operand has unsigned int type, the other operand is converted to unsigned int", so the compiler is not to blame. As a result, DIV_ROUND_CLOSEST(0, 2U) and similar operations now return bad values, since the automatic conversion of expressions such as "0 - 2U/2" to unsigned was not taken into account. Fix by checking for the divisor variable type when deciding which operation to perform. This fixes DIV_ROUND_CLOSEST(0, 2U), but still returns bad values for negative dividends divided by unsigned divisors. Mark the latter case as unsupported. One observed effect of this problem is that the s2c_hwmon driver reports a value of 4198403 instead of 0 if the ADC reads 0. Other impact is unpredictable. Problem is seen if the divisor is an unsigned variable or constant and the dividend is less than (divisor/2). Signed-off-by: Guenter Roeck Reported-by: Juergen Beisert Tested-by: Juergen Beisert Cc: Jean Delvare Cc: [3.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d140e8fb075..c566927efcb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -77,13 +77,15 @@ /* * Divide positive or negative dividend by positive divisor and round - * to closest integer. Result is undefined for negative divisors. + * to closest integer. Result is undefined for negative divisors and + * for negative dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(x) __x = x; \ typeof(divisor) __d = divisor; \ - (((typeof(x))-1) > 0 || (__x) > 0) ? \ + (((typeof(x))-1) > 0 || \ + ((typeof(divisor))-1) > 0 || (__x) > 0) ? \ (((__x) + ((__d) / 2)) / (__d)) : \ (((__x) - ((__d) / 2)) / (__d)); \ } \ From ee297209bf0a25c6717b7c063e76795142d32f37 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 20 Dec 2012 15:05:44 -0800 Subject: [PATCH 19/25] proc: fix inconsistent lock state Lockdep found an inconsistent lock state when rcu is processing delayed work in softirq. Currently, kernel is using spin_lock/spin_unlock to protect proc_inum_ida, but proc_free_inum is called by rcu in softirq context. Use spin_lock_bh/spin_unlock_bh fix following lockdep warning. ================================= [ INFO: inconsistent lock state ] 3.7.0 #36 Not tainted --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: (proc_inum_lock){+.?...}, at: proc_free_inum+0x1c/0x50 {SOFTIRQ-ON-W} state was registered at: __lock_acquire+0x8ae/0xca0 lock_acquire+0x199/0x200 _raw_spin_lock+0x41/0x50 proc_alloc_inum+0x4c/0xd0 alloc_mnt_ns+0x49/0xc0 create_mnt_ns+0x25/0x70 mnt_init+0x161/0x1c7 vfs_caches_init+0x107/0x11a start_kernel+0x348/0x38c x86_64_start_reservations+0x131/0x136 x86_64_start_kernel+0x103/0x112 irq event stamp: 2993422 hardirqs last enabled at (2993422): _raw_spin_unlock_irqrestore+0x55/0x80 hardirqs last disabled at (2993421): _raw_spin_lock_irqsave+0x29/0x70 softirqs last enabled at (2993394): _local_bh_enable+0x13/0x20 softirqs last disabled at (2993395): call_softirq+0x1c/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(proc_inum_lock); lock(proc_inum_lock); *** DEADLOCK *** no locks held by swapper/1/0. stack backtrace: Pid: 0, comm: swapper/1 Not tainted 3.7.0 #36 Call Trace: [] ? vprintk_emit+0x471/0x510 print_usage_bug+0x2a5/0x2c0 mark_lock+0x33b/0x5e0 __lock_acquire+0x813/0xca0 lock_acquire+0x199/0x200 _raw_spin_lock+0x41/0x50 proc_free_inum+0x1c/0x50 free_pid_ns+0x1c/0x50 put_pid_ns+0x2e/0x50 put_pid+0x4a/0x60 delayed_put_pid+0x12/0x20 rcu_process_callbacks+0x462/0x790 __do_softirq+0x1b4/0x3b0 call_softirq+0x1c/0x30 do_softirq+0x59/0xd0 irq_exit+0x54/0xd0 smp_apic_timer_interrupt+0x95/0xa3 apic_timer_interrupt+0x72/0x80 cpuidle_enter_tk+0x10/0x20 cpuidle_enter_state+0x17/0x50 cpuidle_idle_call+0x287/0x520 cpu_idle+0xba/0x130 start_secondary+0x2b3/0x2bc Signed-off-by: Xiaotian Feng Cc: Al Viro Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7b3ae3cc0ef..e659a0ff1da 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -359,18 +359,18 @@ retry: if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) return -ENOMEM; - spin_lock(&proc_inum_lock); + spin_lock_bh(&proc_inum_lock); error = ida_get_new(&proc_inum_ida, &i); - spin_unlock(&proc_inum_lock); + spin_unlock_bh(&proc_inum_lock); if (error == -EAGAIN) goto retry; else if (error) return error; if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { - spin_lock(&proc_inum_lock); + spin_lock_bh(&proc_inum_lock); ida_remove(&proc_inum_ida, i); - spin_unlock(&proc_inum_lock); + spin_unlock_bh(&proc_inum_lock); return -ENOSPC; } *inum = PROC_DYNAMIC_FIRST + i; @@ -379,9 +379,9 @@ retry: void proc_free_inum(unsigned int inum) { - spin_lock(&proc_inum_lock); + spin_lock_bh(&proc_inum_lock); ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); - spin_unlock(&proc_inum_lock); + spin_unlock_bh(&proc_inum_lock); } static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) From 99bbb2b0d4431e67d11a98f66a6ef8fcd8622649 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Dec 2012 15:05:45 -0800 Subject: [PATCH 20/25] Documentation: ABI: remove testing/sysfs-devices-node This file is already documented in the stable ABI (see commit 5bbe1ec11fcf). Signed-off-by: Davidlohr Bueso Cc: Greg KH Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-devices-node | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 Documentation/ABI/testing/sysfs-devices-node diff --git a/Documentation/ABI/testing/sysfs-devices-node b/Documentation/ABI/testing/sysfs-devices-node deleted file mode 100644 index 453a210c3ce..00000000000 --- a/Documentation/ABI/testing/sysfs-devices-node +++ /dev/null @@ -1,7 +0,0 @@ -What: /sys/devices/system/node/nodeX/compact -Date: February 2010 -Contact: Mel Gorman -Description: - When this file is written to, all memory within that node - will be compacted. When it completes, memory will be freed - into blocks which have as many contiguous pages as possible From c39540c6d1add1d0ad843b3d2437311924193359 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 20 Dec 2012 15:05:46 -0800 Subject: [PATCH 21/25] fat: fix incorrect function comment fat_search_long() returns 0 on success, -ENOENT/ENOMEM on failure. Change the function comment accordingly. While at it, fix some trivial typos. Signed-off-by: Ravishankar N Signed-off-by: Namjae Jeon Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 5 ++--- fs/fat/inode.c | 2 +- fs/fat/misc.c | 4 ++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 2a182342442..58bf744dbf3 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -461,8 +461,7 @@ static int fat_parse_short(struct super_block *sb, } /* - * Return values: negative -> error, 0 -> not found, positive -> found, - * value is the total amount of slots, including the shortname entry. + * Return values: negative -> error/not found, 0 -> found. */ int fat_search_long(struct inode *inode, const unsigned char *name, int name_len, struct fat_slot_info *sinfo) @@ -1255,7 +1254,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, sinfo->nr_slots = nr_slots; - /* First stage: search free direcotry entries */ + /* First stage: search free directory entries */ free_slots = nr_bhs = 0; bh = prev = NULL; pos = 0; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 35806813ea4..f8f491677a4 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1344,7 +1344,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->dir_entries = get_unaligned_le16(&b->dir_entries); if (sbi->dir_entries & (sbi->dir_per_block - 1)) { if (!silent) - fat_msg(sb, KERN_ERR, "bogus directroy-entries per block" + fat_msg(sb, KERN_ERR, "bogus directory-entries per block" " (%u)", sbi->dir_entries); brelse(bh); goto out_invalid; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 5eb600dc43a..359d307b550 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -135,6 +135,10 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) } if (ret < 0) return ret; + /* + * FIXME:Although we can add this cache, fat_cache_add() is + * assuming to be called after linear search with fat_cache_id. + */ // fat_cache_add(inode, new_fclus, new_dclus); } else { MSDOS_I(inode)->i_start = new_dclus; From 891348ca0f66206f1dc0e30d63757e3df1ae2d15 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Thu, 20 Dec 2012 15:05:50 -0800 Subject: [PATCH 22/25] SGI-XP: handle non-fatal traps We found a user code which was raising a divide-by-zero trap. That trap would lead to XPC connections between system-partitions being torn down due to the die_chain notifier callouts it received. This also revealed a different issue where multiple callers into xpc_die_deactivate() would all attempt to do the disconnect in parallel which would sometimes lock up but often overwhelm the console on very large machines as each would print at least one line of output at the end of the deactivate. I reviewed all the users of the die_chain notifier and changed the code to ignore the notifier callouts for reasons which will not actually lead to a system to continue on to call die(). [akpm@linux-foundation.org: fix ia64] Signed-off-by: Robin Holt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/sgi-xp/xpc_main.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 8d082b46426..d971817182f 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -53,6 +53,10 @@ #include #include "xpc.h" +#ifdef CONFIG_X86_64 +#include +#endif + /* define two XPC debug device structures to be used with dev_dbg() et al */ struct device_driver xpc_dbg_name = { @@ -1079,6 +1083,9 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) return NOTIFY_DONE; } +/* Used to only allow one cpu to complete disconnect */ +static unsigned int xpc_die_disconnecting; + /* * Notify other partitions to deactivate from us by first disengaging from all * references to our memory. @@ -1092,6 +1099,9 @@ xpc_die_deactivate(void) long keep_waiting; long wait_to_print; + if (cmpxchg(&xpc_die_disconnecting, 0, 1)) + return; + /* keep xpc_hb_checker thread from doing anything (just in case) */ xpc_exiting = 1; @@ -1159,7 +1169,7 @@ xpc_die_deactivate(void) * about the lack of a heartbeat. */ static int -xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) +xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) { #ifdef CONFIG_IA64 /* !!! temporary kludge */ switch (event) { @@ -1191,7 +1201,27 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) break; } #else - xpc_die_deactivate(); + struct die_args *die_args = _die_args; + + switch (event) { + case DIE_TRAP: + if (die_args->trapnr == X86_TRAP_DF) + xpc_die_deactivate(); + + if (((die_args->trapnr == X86_TRAP_MF) || + (die_args->trapnr == X86_TRAP_XF)) && + !user_mode_vm(die_args->regs)) + xpc_die_deactivate(); + + break; + case DIE_INT3: + case DIE_DEBUG: + break; + case DIE_OOPS: + case DIE_GPF: + default: + xpc_die_deactivate(); + } #endif return NOTIFY_DONE; From a68c2f12b4b28994aaf622bbe5724b7258cc2fcf Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Thu, 20 Dec 2012 15:05:52 -0800 Subject: [PATCH 23/25] sendfile: allows bypassing of notifier events do_sendfile() in fs/read_write.c does not call the fsnotify functions, unlike its neighbors. This manifests as a lack of inotify ACCESS events when a file is sent using sendfile(2). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=12812 [akpm@linux-foundation.org: use fsnotify_modify(out.file), not fsnotify_access(), per Dave] Signed-off-by: Alan Cox Cc: Dave Chinner Cc: Jens Axboe Cc: Scott Wolchok Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/read_write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index 1edaf099ddd..bb34af31528 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -935,6 +935,8 @@ ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, if (retval > 0) { add_rchar(current, retval); add_wchar(current, retval); + fsnotify_access(in.file); + fsnotify_modify(out.file); } inc_syscr(current); From e67eab39bee26f509d38d00ca1a8f24b63f46a31 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 20 Dec 2012 15:05:54 -0800 Subject: [PATCH 24/25] keys: fix unreachable code We set ret to NULL then test it. Remove the bogus test Signed-off-by: Alan Cox Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/keys/process_keys.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 58dfe089094..20e4bf57aec 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -367,8 +367,6 @@ key_ref_t search_my_process_keyrings(struct key_type *type, switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ - if (ret) - break; case -ENOKEY: /* negative key */ ret = key_ref; break; From cfde819088422503b5c69e03ab7bb90f87121d4d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 15:05:56 -0800 Subject: [PATCH 25/25] keys: use keyring_alloc() to create module signing keyring Use keyring_alloc() to create special keyrings now that it has a permissions parameter rather than using key_alloc() + key_instantiate_and_link(). Signed-off-by: David Howells Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/modsign_pubkey.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 045504fffbb..2b6e69909c3 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c @@ -34,18 +34,15 @@ static __init int module_verify_init(void) { pr_notice("Initialise module verification\n"); - modsign_keyring = key_alloc(&key_type_keyring, ".module_sign", - KUIDT_INIT(0), KGIDT_INIT(0), - current_cred(), - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + modsign_keyring = keyring_alloc(".module_sign", + KUIDT_INIT(0), KGIDT_INIT(0), + current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(modsign_keyring)) panic("Can't allocate module signing keyring\n"); - if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0) - panic("Can't instantiate module signing keyring\n"); - return 0; }