From 010fc29a45a2e8dbc08bf45ef80b8622619aaae0 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 20 Dec 2012 15:05:06 -0800
Subject: [PATCH 01/25] compaction: fix build error in CMA && !COMPACTION

isolate_freepages_block() and isolate_migratepages_range() are used for
CMA as well as compaction so it breaks build for CONFIG_CMA &&
!CONFIG_COMPACTION.

This patch fixes it.

[akpm@linux-foundation.org: add "do { } while (0)", per Mel]
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/compaction.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 5ad7f4f4d6f..6b807e46649 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -17,6 +17,21 @@
 #include <linux/balloon_compaction.h>
 #include "internal.h"
 
+#ifdef CONFIG_COMPACTION
+static inline void count_compact_event(enum vm_event_item item)
+{
+	count_vm_event(item);
+}
+
+static inline void count_compact_events(enum vm_event_item item, long delta)
+{
+	count_vm_events(item, delta);
+}
+#else
+#define count_compact_event(item) do { } while (0)
+#define count_compact_events(item, delta) do { } while (0)
+#endif
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 #define CREATE_TRACE_POINTS
@@ -303,10 +318,9 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 	if (blockpfn == end_pfn)
 		update_pageblock_skip(cc, valid_page, total_isolated, false);
 
-	count_vm_events(COMPACTFREE_SCANNED, nr_scanned);
+	count_compact_events(COMPACTFREE_SCANNED, nr_scanned);
 	if (total_isolated)
-		count_vm_events(COMPACTISOLATED, total_isolated);
-
+		count_compact_events(COMPACTISOLATED, total_isolated);
 	return total_isolated;
 }
 
@@ -613,9 +627,9 @@ next_pageblock:
 
 	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
 
-	count_vm_events(COMPACTMIGRATE_SCANNED, nr_scanned);
+	count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned);
 	if (nr_isolated)
-		count_vm_events(COMPACTISOLATED, nr_isolated);
+		count_compact_events(COMPACTISOLATED, nr_isolated);
 
 	return low_pfn;
 }
@@ -1110,7 +1124,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	if (!order || !may_enter_fs || !may_perform_io)
 		return rc;
 
-	count_vm_event(COMPACTSTALL);
+	count_compact_event(COMPACTSTALL);
 
 #ifdef CONFIG_CMA
 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)

From c8b74c2f6604923de91f8aa6539f8bb934736754 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Thu, 20 Dec 2012 15:05:07 -0800
Subject: [PATCH 02/25] mm: fix calculation of dirtyable memory

The system uses global_dirtyable_memory() to calculate number of
dirtyable pages/pages that can be allocated to the page cache.  A bug
causes an underflow thus making the page count look like a big unsigned
number.  This in turn confuses the dirty writeback throttling to
aggressively write back pages as they become dirty (usually 1 page at a
time).  This generally only affects systems with highmem because the
underflowed count gets subtracted from the global count of dirtyable
memory.

The problem was introduced with v3.2-4896-gab8fabd

Fix is to ensure we don't get an underflowed total of either highmem or
global dirtyable memory.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Signed-off-by: Puneet Kumar <puneetster@chromium.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Damien Wyart <damien.wyart@free.fr>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6f427122449..0713bfbf095 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -200,6 +200,18 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 		x += zone_page_state(z, NR_FREE_PAGES) +
 		     zone_reclaimable_pages(z) - z->dirty_balance_reserve;
 	}
+	/*
+	 * Unreclaimable memory (kernel memory or anonymous memory
+	 * without swap) can bring down the dirtyable pages below
+	 * the zone's dirty balance reserve and the above calculation
+	 * will underflow.  However we still want to add in nodes
+	 * which are below threshold (negative values) to get a more
+	 * accurate calculation but make sure that the total never
+	 * underflows.
+	 */
+	if ((long)x < 0)
+		x = 0;
+
 	/*
 	 * Make sure that the number of highmem pages is never larger
 	 * than the number of the total dirtyable memory. This can only
@@ -222,8 +234,8 @@ static unsigned long global_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
-	    dirty_balance_reserve;
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+	x -= min(x, dirty_balance_reserve);
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
@@ -290,9 +302,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
 	 * highmem zone can hold its share of dirty pages, so we don't
 	 * care about vm_highmem_is_dirtyable here.
 	 */
-	return zone_page_state(zone, NR_FREE_PAGES) +
-	       zone_reclaimable_pages(zone) -
-	       zone->dirty_balance_reserve;
+	unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) +
+		zone_reclaimable_pages(zone);
+
+	/* don't allow this to underflow */
+	nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
+	return nr_pages;
 }
 
 /**

From 038b358e5592eff9be175cf6601042ecc8b28179 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Thu, 20 Dec 2012 15:05:10 -0800
Subject: [PATCH 03/25] Documentation: kernel-parameters.txt remove
 capability.disable

Remove the documentation for capability.disable.  The code supporting
this parameter was removed with commit 5915eb53861c ("security: remove
dummy module")

Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Rob Landley <rob@landley.net>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ddd84d62718..363e348bff9 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -446,12 +446,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			possible to determine what the correct size should be.
 			This option provides an override for these situations.
 
-	capability.disable=
-			[SECURITY] Disable capabilities.  This would normally
-			be used only if an alternative security model is to be
-			configured.  Potentially dangerous and should only be
-			used if you are entirely sure of the consequences.
-
 	ccw_timeout_log [S390]
 			See Documentation/s390/CommonIO for details.
 

From f1d8e614d74b09531b9a85e812485340f3df7b1c Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Thu, 20 Dec 2012 15:05:13 -0800
Subject: [PATCH 04/25] drivers/firmware/dmi_scan.c: check dmi version when get
 system uuid

As of version 2.6 of the SMBIOS specification, the first 3 fields of the
UUID are supposed to be little-endian encoded.

Also a minor fix to match variable meaning and mute checkpatch.pl

[akpm@linux-foundation.org: tweak code comment]
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Cc: Feng Jin <joe.jin@oracle.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/dmi_scan.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index b298158cb92..3714e3c03df 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -16,6 +16,7 @@
  */
 static char dmi_empty_string[] = "        ";
 
+static u16 __initdata dmi_ver;
 /*
  * Catch too early calls to dmi_check_system():
  */
@@ -161,8 +162,10 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde
 		return;
 
 	for (i = 0; i < 16 && (is_ff || is_00); i++) {
-		if(d[i] != 0x00) is_ff = 0;
-		if(d[i] != 0xFF) is_00 = 0;
+		if (d[i] != 0x00)
+			is_00 = 0;
+		if (d[i] != 0xFF)
+			is_ff = 0;
 	}
 
 	if (is_ff || is_00)
@@ -172,7 +175,15 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde
 	if (!s)
 		return;
 
-	sprintf(s, "%pUB", d);
+	/*
+	 * As of version 2.6 of the SMBIOS specification, the first 3 fields of
+	 * the UUID are supposed to be little-endian encoded.  The specification
+	 * says that this is the defacto standard.
+	 */
+	if (dmi_ver >= 0x0206)
+		sprintf(s, "%pUL", d);
+	else
+		sprintf(s, "%pUB", d);
 
         dmi_ident[slot] = s;
 }
@@ -414,6 +425,7 @@ static int __init dmi_present(const char __iomem *p)
 		 * DMI version 0.0 means that the real version is taken from
 		 * the SMBIOS version, which we don't know at this point.
 		 */
+		dmi_ver = (buf[14] & 0xf0) << 4 | (buf[14] & 0x0f);
 		if (buf[14] != 0)
 			printk(KERN_INFO "DMI %d.%d present.\n",
 			       buf[14] >> 4, buf[14] & 0xF);

From 9f9c9cbb60576a1518d0bf93fb8e499cffccf377 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Thu, 20 Dec 2012 15:05:14 -0800
Subject: [PATCH 05/25] drivers/firmware/dmi_scan.c: fetch dmi version from
 SMBIOS if it exists

The right dmi version is in SMBIOS if it's zero in DMI region

This issue was originally found from an oracle bug.
One customer noticed system UUID doesn't match between dmidecode & uek2.

 - HP ProLiant BL460c G6 :
   # cat /sys/devices/virtual/dmi/id/product_uuid
   00000000-0000-4C48-3031-4D5030333531
   # dmidecode | grep -i uuid
   UUID: 00000000-0000-484C-3031-4D5030333531

From SMBIOS 2.6 on, spec use little-endian encoding for UUID other than
network byte order.

So we need to get dmi version to distinguish.  If version is 0.0, the
real version is taken from the SMBIOS version.  This is part of original
kernel comment in code.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Cc: Feng Jin <joe.jin@oracle.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/dmi_scan.c | 62 ++++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 3714e3c03df..fd3ae6290d7 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -119,12 +119,12 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 	return 0;
 }
 
-static int __init dmi_checksum(const u8 *buf)
+static int __init dmi_checksum(const u8 *buf, u8 len)
 {
 	u8 sum = 0;
 	int a;
 
-	for (a = 0; a < 15; a++)
+	for (a = 0; a < len; a++)
 		sum += buf[a];
 
 	return sum == 0;
@@ -415,30 +415,57 @@ static int __init dmi_present(const char __iomem *p)
 	u8 buf[15];
 
 	memcpy_fromio(buf, p, 15);
-	if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
+	if (dmi_checksum(buf, 15)) {
 		dmi_num = (buf[13] << 8) | buf[12];
 		dmi_len = (buf[7] << 8) | buf[6];
 		dmi_base = (buf[11] << 24) | (buf[10] << 16) |
 			(buf[9] << 8) | buf[8];
 
-		/*
-		 * DMI version 0.0 means that the real version is taken from
-		 * the SMBIOS version, which we don't know at this point.
-		 */
-		dmi_ver = (buf[14] & 0xf0) << 4 | (buf[14] & 0x0f);
-		if (buf[14] != 0)
-			printk(KERN_INFO "DMI %d.%d present.\n",
-			       buf[14] >> 4, buf[14] & 0xF);
-		else
-			printk(KERN_INFO "DMI present.\n");
 		if (dmi_walk_early(dmi_decode) == 0) {
+			if (dmi_ver)
+				pr_info("SMBIOS %d.%d present.\n",
+				       dmi_ver >> 8, dmi_ver & 0xFF);
+			else {
+				dmi_ver = (buf[14] & 0xF0) << 4 |
+					   (buf[14] & 0x0F);
+				pr_info("Legacy DMI %d.%d present.\n",
+				       dmi_ver >> 8, dmi_ver & 0xFF);
+			}
 			dmi_dump_ids();
 			return 0;
 		}
 	}
+	dmi_ver = 0;
 	return 1;
 }
 
+static int __init smbios_present(const char __iomem *p)
+{
+	u8 buf[32];
+	int offset = 0;
+
+	memcpy_fromio(buf, p, 32);
+	if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) {
+		dmi_ver = (buf[6] << 8) + buf[7];
+
+		/* Some BIOS report weird SMBIOS version, fix that up */
+		switch (dmi_ver) {
+		case 0x021F:
+		case 0x0221:
+			pr_debug("SMBIOS version fixup(2.%d->2.%d)\n",
+			       dmi_ver & 0xFF, 3);
+			dmi_ver = 0x0203;
+			break;
+		case 0x0233:
+			pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6);
+			dmi_ver = 0x0206;
+			break;
+		}
+		offset = 16;
+	}
+	return dmi_present(buf + offset);
+}
+
 void __init dmi_scan_machine(void)
 {
 	char __iomem *p, *q;
@@ -456,7 +483,7 @@ void __init dmi_scan_machine(void)
 		if (p == NULL)
 			goto error;
 
-		rc = dmi_present(p + 0x10); /* offset of _DMI_ string */
+		rc = smbios_present(p);
 		dmi_iounmap(p, 32);
 		if (!rc) {
 			dmi_available = 1;
@@ -474,7 +501,12 @@ void __init dmi_scan_machine(void)
 			goto error;
 
 		for (q = p; q < p + 0x10000; q += 16) {
-			rc = dmi_present(q);
+			if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0)
+				rc = smbios_present(q);
+			else if (memcmp(q, "_DMI_", 5) == 0)
+				rc = dmi_present(q);
+			else
+				continue;
 			if (!rc) {
 				dmi_available = 1;
 				dmi_iounmap(p, 0x10000);

From b66c5984017533316fd1951770302649baf1aa33 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 20 Dec 2012 15:05:16 -0800
Subject: [PATCH 06/25] exec: do not leave bprm->interp on stack

If a series of scripts are executed, each triggering module loading via
unprintable bytes in the script header, kernel stack contents can leak
into the command line.

Normally execution of binfmt_script and binfmt_misc happens recursively.
However, when modules are enabled, and unprintable bytes exist in the
bprm->buf, execution will restart after attempting to load matching
binfmt modules.  Unfortunately, the logic in binfmt_script and
binfmt_misc does not expect to get restarted.  They leave bprm->interp
pointing to their local stack.  This means on restart bprm->interp is
left pointing into unused stack memory which can then be copied into the
userspace argv areas.

After additional study, it seems that both recursion and restart remains
the desirable way to handle exec with scripts, misc, and modules.  As
such, we need to protect the changes to interp.

This changes the logic to require allocation for any changes to the
bprm->interp.  To avoid adding a new kmalloc to every exec, the default
value is left as-is.  Only when passing through binfmt_script or
binfmt_misc does an allocation take place.

For a proof of concept, see DoTest.sh from:

   http://www.halfdog.net/Security/2012/LinuxKernelBinfmtScriptStackDataDisclosure/

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: halfdog <me@halfdog.net>
Cc: P J P <ppandit@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c        |  5 ++++-
 fs/binfmt_script.c      |  4 +++-
 fs/exec.c               | 15 +++++++++++++++
 include/linux/binfmts.h |  1 +
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9be335fb8a7..0c8869fdd14 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -172,7 +172,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		goto _error;
 	bprm->argc ++;
 
-	bprm->interp = iname;	/* for binfmt_script */
+	/* Update interp in case binfmt_script needs it. */
+	retval = bprm_change_interp(iname, bprm);
+	if (retval < 0)
+		goto _error;
 
 	interp_file = open_exec (iname);
 	retval = PTR_ERR (interp_file);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1610a91637e..5027a3e1492 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -80,7 +80,9 @@ static int load_script(struct linux_binprm *bprm)
 	retval = copy_strings_kernel(1, &i_name, bprm);
 	if (retval) return retval; 
 	bprm->argc++;
-	bprm->interp = interp;
+	retval = bprm_change_interp(interp, bprm);
+	if (retval < 0)
+		return retval;
 
 	/*
 	 * OK, now restart the process with the interpreter's dentry.
diff --git a/fs/exec.c b/fs/exec.c
index d8e1191cb11..237d5342786 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1175,9 +1175,24 @@ void free_bprm(struct linux_binprm *bprm)
 		mutex_unlock(&current->signal->cred_guard_mutex);
 		abort_creds(bprm->cred);
 	}
+	/* If a binfmt changed the interp, free it. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
 	kfree(bprm);
 }
 
+int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+{
+	/* If a binfmt changed the interp, free it first. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
+	bprm->interp = kstrdup(interp, GFP_KERNEL);
+	if (!bprm->interp)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL(bprm_change_interp);
+
 /*
  * install the new credentials for this executable
  */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a4c2b565c83..bdf3965f0a2 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -112,6 +112,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
+extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc, const char *const *argv,
 			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);

From bcc2b02f4c1b36bc67272df7119b75bac78525ab Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 20 Dec 2012 15:05:18 -0800
Subject: [PATCH 07/25] mm: cma: WARN if freed memory is still in use

Memory returned to free_contig_range() must have no other references.
Let kernel to complain loudly if page reference count is not equal to 1.

[rientjes@google.com: support sparsemem]
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2ad2ad168ef..4ba5e37127f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5978,8 +5978,15 @@ done:
 
 void free_contig_range(unsigned long pfn, unsigned nr_pages)
 {
-	for (; nr_pages--; ++pfn)
-		__free_page(pfn_to_page(pfn));
+	unsigned int count = 0;
+
+	for (; nr_pages--; pfn++) {
+		struct page *page = pfn_to_page(pfn);
+
+		count += page_count(page) != 1;
+		__free_page(page);
+	}
+	WARN(count != 0, "%d pages are still in use!\n", count);
 }
 #endif
 

From ba3f7a1765dc4095c757ac82eb14d6824c45e87d Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 20 Dec 2012 15:05:19 -0800
Subject: [PATCH 08/25] drivers/rtc/rtc-imxdi.c: must include
 <linux/spinlock.h>

Add the missing header include for spinlocks, to avoid potential build
failures on specific architectures or configurations.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-imxdi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 18a4f0dd78a..8da7a5cf83c 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c
@@ -36,6 +36,7 @@
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 #include <linux/workqueue.h>
 #include <linux/of.h>
 

From 44fd07e989a9a27476d8963296688127a4fd4df4 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 20 Dec 2012 15:05:21 -0800
Subject: [PATCH 09/25] kcmp: include linux/ptrace.h

This makes it compile on s390. After all the ptrace_may_access
(which we use this file) is declared exactly in linux/ptrace.h.

This is preparatory work to wire this syscall up on all archs.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Alexander Kartashov <alekskartashov@parallels.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kcmp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 30b7b225306..e30ac0fe61c 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -4,6 +4,7 @@
 #include <linux/string.h>
 #include <linux/random.h>
 #include <linux/module.h>
+#include <linux/ptrace.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/cache.h>

From 5daa669c80c121ab75ecdf1c8e2df52f072fd25e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 20 Dec 2012 15:05:24 -0800
Subject: [PATCH 10/25] hfsplus: avoid crash on failed block map free

If the read fails we kmap an error code.  This doesn't end well.  Instead
print a critical error and pray.  This mirrors the rest of the fs
behaviour with critical error cases.

Acked-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Acked-by: Hin-Tak Leung <htl10@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/bitmap.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index 4cfbe2edd29..6feefc0cb48 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -176,12 +176,14 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 	dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count);
 	/* are all of the bits in range? */
 	if ((offset + count) > sbi->total_blocks)
-		return -2;
+		return -ENOENT;
 
 	mutex_lock(&sbi->alloc_mutex);
 	mapping = sbi->alloc_file->i_mapping;
 	pnr = offset / PAGE_CACHE_BITS;
 	page = read_mapping_page(mapping, pnr, NULL);
+	if (IS_ERR(page))
+		goto kaboom;
 	pptr = kmap(page);
 	curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
 	end = pptr + PAGE_CACHE_BITS / 32;
@@ -214,6 +216,8 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 		set_page_dirty(page);
 		kunmap(page);
 		page = read_mapping_page(mapping, ++pnr, NULL);
+		if (IS_ERR(page))
+			goto kaboom;
 		pptr = kmap(page);
 		curr = pptr;
 		end = pptr + PAGE_CACHE_BITS / 32;
@@ -232,4 +236,11 @@ out:
 	mutex_unlock(&sbi->alloc_mutex);
 
 	return 0;
+
+kaboom:
+	printk(KERN_CRIT "hfsplus: unable to mark blocks free: error %ld\n",
+			PTR_ERR(page));
+	mutex_unlock(&sbi->alloc_mutex);
+
+	return -EIO;
 }

From 1b243fd39bd605cdfc482bba4e56b0cb34b28f27 Mon Sep 17 00:00:00 2001
From: Vyacheslav Dubeyko <slava@dubeyko.com>
Date: Thu, 20 Dec 2012 15:05:25 -0800
Subject: [PATCH 11/25] hfsplus: rework processing errors in
 hfsplus_free_extents()

Currently, it doesn't process error codes from the hfsplus_block_free()
call in hfsplus_free_extents() method.  Add some error code processing.

Signed-off-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Hin-Tak Leung <htl10@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/extents.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 5849e3ef35c..eba76eab6d6 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -329,6 +329,7 @@ static int hfsplus_free_extents(struct super_block *sb,
 {
 	u32 count, start;
 	int i;
+	int err = 0;
 
 	hfsplus_dump_extent(extent);
 	for (i = 0; i < 8; extent++, i++) {
@@ -345,18 +346,33 @@ found:
 	for (;;) {
 		start = be32_to_cpu(extent->start_block);
 		if (count <= block_nr) {
-			hfsplus_block_free(sb, start, count);
+			err = hfsplus_block_free(sb, start, count);
+			if (err) {
+				printk(KERN_ERR "hfs: can't free extent\n");
+				dprint(DBG_EXTENT, " start: %u count: %u\n",
+					start, count);
+			}
 			extent->block_count = 0;
 			extent->start_block = 0;
 			block_nr -= count;
 		} else {
 			count -= block_nr;
-			hfsplus_block_free(sb, start + count, block_nr);
+			err = hfsplus_block_free(sb, start + count, block_nr);
+			if (err) {
+				printk(KERN_ERR "hfs: can't free extent\n");
+				dprint(DBG_EXTENT, " start: %u count: %u\n",
+					start, count);
+			}
 			extent->block_count = cpu_to_be32(count);
 			block_nr = 0;
 		}
-		if (!block_nr || !i)
-			return 0;
+		if (!block_nr || !i) {
+			/*
+			 * Try to free all extents and
+			 * return only last error
+			 */
+			return err;
+		}
 		i--;
 		extent--;
 		count = be32_to_cpu(extent->block_count);

From 81cc7fad552bc9e4fa8c1f25becbecaaa1d41b67 Mon Sep 17 00:00:00 2001
From: Vyacheslav Dubeyko <slava@dubeyko.com>
Date: Thu, 20 Dec 2012 15:05:28 -0800
Subject: [PATCH 12/25] hfsplus: rework processing of hfs_btree_write()
 returned error

Add to hfs_btree_write() a return of -EIO on failure of b-tree node
searching.  Also add logic ofor processing errors from hfs_btree_write()
in hfsplus_system_write_inode() with a message about b-tree writing
failure.

[akpm@linux-foundation.org: reduce scope of `err', print errno on error]
Signed-off-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Hin-Tak Leung <htl10@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/btree.c      |  5 +++--
 fs/hfsplus/hfsplus_fs.h |  2 +-
 fs/hfsplus/super.c      | 10 ++++++++--
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 21023d9f8ff..685d07d0ed1 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -159,7 +159,7 @@ void hfs_btree_close(struct hfs_btree *tree)
 	kfree(tree);
 }
 
-void hfs_btree_write(struct hfs_btree *tree)
+int hfs_btree_write(struct hfs_btree *tree)
 {
 	struct hfs_btree_header_rec *head;
 	struct hfs_bnode *node;
@@ -168,7 +168,7 @@ void hfs_btree_write(struct hfs_btree *tree)
 	node = hfs_bnode_find(tree, 0);
 	if (IS_ERR(node))
 		/* panic? */
-		return;
+		return -EIO;
 	/* Load the header */
 	page = node->page[0];
 	head = (struct hfs_btree_header_rec *)(kmap(page) +
@@ -186,6 +186,7 @@ void hfs_btree_write(struct hfs_btree *tree)
 	kunmap(page);
 	set_page_dirty(page);
 	hfs_bnode_put(node);
+	return 0;
 }
 
 static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx)
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index c571de224b1..a6da86b1b4c 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -335,7 +335,7 @@ int hfsplus_block_free(struct super_block *, u32, u32);
 /* btree.c */
 struct hfs_btree *hfs_btree_open(struct super_block *, u32);
 void hfs_btree_close(struct hfs_btree *);
-void hfs_btree_write(struct hfs_btree *);
+int hfs_btree_write(struct hfs_btree *);
 struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *);
 void hfs_bmap_free(struct hfs_bnode *);
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 811a84d2d96..2036f585b09 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -127,8 +127,14 @@ static int hfsplus_system_write_inode(struct inode *inode)
 		hfsplus_mark_mdb_dirty(inode->i_sb);
 	}
 	hfsplus_inode_write_fork(inode, fork);
-	if (tree)
-		hfs_btree_write(tree);
+	if (tree) {
+		int err = hfs_btree_write(tree);
+		if (err) {
+			printk(KERN_ERR "hfs: b-tree write err: %d, ino %lu\n",
+					err, inode->i_ino);
+			return err;
+		}
+	}
 	return 0;
 }
 

From bffdd661bd424ea4298639805bfcbcaf8ffb62f2 Mon Sep 17 00:00:00 2001
From: Vyacheslav Dubeyko <slava@dubeyko.com>
Date: Thu, 20 Dec 2012 15:05:29 -0800
Subject: [PATCH 13/25] hfsplus: add error message for the case of failure of
 sync fs in delayed_sync_fs() method

Add an error message for the case of failure of sync fs in
delayed_sync_fs() method.

Signed-off-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Hin-Tak Leung <htl10@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/super.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 2036f585b09..796198d2655 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -232,6 +232,7 @@ out:
 
 static void delayed_sync_fs(struct work_struct *work)
 {
+	int err;
 	struct hfsplus_sb_info *sbi;
 
 	sbi = container_of(work, struct hfsplus_sb_info, sync_work.work);
@@ -240,7 +241,9 @@ static void delayed_sync_fs(struct work_struct *work)
 	sbi->work_queued = 0;
 	spin_unlock(&sbi->work_lock);
 
-	hfsplus_sync_fs(sbi->alloc_file->i_sb, 1);
+	err = hfsplus_sync_fs(sbi->alloc_file->i_sb, 1);
+	if (err)
+		printk(KERN_ERR "hfs: delayed sync fs err %d\n", err);
 }
 
 void hfsplus_mark_mdb_dirty(struct super_block *sb)

From 2c79737af83e0d586899f48c6010148ea2064369 Mon Sep 17 00:00:00 2001
From: Jeremy Eder <jeder@redhat.com>
Date: Thu, 20 Dec 2012 15:05:32 -0800
Subject: [PATCH 14/25] mm: clean up transparent hugepage sysfs error messages

Clarify error messages and correct a few typos in the transparent hugepage
sysfs init code.

Signed-off-by: Jeremy Eder <jeder@redhat.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 32754eece63..9e894edc781 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -574,19 +574,19 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
 
 	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
 	if (unlikely(!*hugepage_kobj)) {
-		printk(KERN_ERR "hugepage: failed kobject create\n");
+		printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n");
 		return -ENOMEM;
 	}
 
 	err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
 	if (err) {
-		printk(KERN_ERR "hugepage: failed register hugeage group\n");
+		printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
 		goto delete_obj;
 	}
 
 	err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
 	if (err) {
-		printk(KERN_ERR "hugepage: failed register hugeage group\n");
+		printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
 		goto remove_hp_group;
 	}
 

From 5abe257af8b95857b95fa0ba694530b446ae32d8 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 20 Dec 2012 15:05:34 -0800
Subject: [PATCH 15/25] revert "rtc: recycle id when unloading a rtc driver"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert commit 2830a6d20139df2198d63235df7957712adb28e5.

We already perform the ida_simple_remove() in rtc_device_release(),
which is an appropriate place.  Commit 2830a6d20 ("rtc: recycle id when
unloading a rtc driver") caused the kernel to emit

	ida_remove called for id=0 which is not allocated.

warnings when rtc_device_release() tries to release an alread-released
ID.

Let's restore things to their previous state and then work out why
Vincent's kernel wasn't calling rtc_device_release() - presumably a bug
in a specific sub-driver.

Reported-by: Lothar Waßmann <LW@KARO-electronics.de>
Acked-by: Alexander Holler <holler@ahsoftware.de>
Cc: Vincent Palatin <vpalatin@chromium.org>
Cc: <stable@vger.kernel.org>		[3.7.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/class.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index f8a0aab218c..5143629dedb 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -244,7 +244,6 @@ void rtc_device_unregister(struct rtc_device *rtc)
 		rtc_proc_del_device(rtc);
 		device_unregister(&rtc->dev);
 		rtc->ops = NULL;
-		ida_simple_remove(&rtc_ida, rtc->id);
 		mutex_unlock(&rtc->ops_lock);
 		put_device(&rtc->dev);
 	}

From 495e9d84607cda966ba6d223d5eb9df0070cd21a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 20 Dec 2012 15:05:37 -0800
Subject: [PATCH 16/25] checkpatch: warn on uapi #includes that #include
 <uapi/...

Avoid specifying internal uapi #include paths with uapi/...  as
userspace should not use and never see that.

Neaten message line wrapping above.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 1d6e4c54137..4d2c7dfdaab 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2226,8 +2226,11 @@ sub process {
 			my $path = $1;
 			if ($path =~ m{//}) {
 				ERROR("MALFORMED_INCLUDE",
-				      "malformed #include filename\n" .
-					$herecurr);
+				      "malformed #include filename\n" . $herecurr);
+			}
+			if ($path =~ "^uapi/" && $realfile =~ m@\binclude/uapi/@) {
+				ERROR("UAPI_INCLUDE",
+				      "No #include in ...include/uapi/... should use a uapi/ path prefix\n" . $herecurr);
 			}
 		}
 

From 154b454edaf6d94a69016db6c342c57fa935bbe9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 20 Dec 2012 15:05:40 -0800
Subject: [PATCH 17/25] memcg: don't register hotcpu notifier from
 ->css_alloc()

Commit 648bb56d076b ("cgroup: lock cgroup_mutex in cgroup_init_subsys()")
made cgroup_init_subsys() grab cgroup_mutex before invoking
->css_alloc() for the root css.  Because memcg registers hotcpu notifier
from ->css_alloc() for the root css, this introduced circular locking
dependency between cgroup_mutex and cpu hotplug.

Fix it by moving hotcpu notifier registration to a subsys initcall.

  ======================================================
  [ INFO: possible circular locking dependency detected ]
  3.7.0-rc4-work+ #42 Not tainted
  -------------------------------------------------------
  bash/645 is trying to acquire lock:
   (cgroup_mutex){+.+.+.}, at: [<ffffffff8110c5b7>] cgroup_lock+0x17/0x20

  but task is already holding lock:
   (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff8109300f>] cpu_hotplug_begin+0x2f/0x60

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

 -> #1 (cpu_hotplug.lock){+.+.+.}:
         lock_acquire+0x97/0x1e0
         mutex_lock_nested+0x61/0x3b0
         get_online_cpus+0x3c/0x60
         rebuild_sched_domains_locked+0x1b/0x70
         cpuset_write_resmask+0x298/0x2c0
         cgroup_file_write+0x1ef/0x300
         vfs_write+0xa8/0x160
         sys_write+0x52/0xa0
         system_call_fastpath+0x16/0x1b

 -> #0 (cgroup_mutex){+.+.+.}:
         __lock_acquire+0x14ce/0x1d20
         lock_acquire+0x97/0x1e0
         mutex_lock_nested+0x61/0x3b0
         cgroup_lock+0x17/0x20
         cpuset_handle_hotplug+0x1b/0x560
         cpuset_update_active_cpus+0xe/0x10
         cpuset_cpu_inactive+0x47/0x50
         notifier_call_chain+0x66/0x150
         __raw_notifier_call_chain+0xe/0x10
         __cpu_notify+0x20/0x40
         _cpu_down+0x7e/0x2f0
         cpu_down+0x36/0x50
         store_online+0x5d/0xe0
         dev_attr_store+0x18/0x30
         sysfs_write_file+0xe0/0x150
         vfs_write+0xa8/0x160
         sys_write+0x52/0xa0
         system_call_fastpath+0x16/0x1b
  other info that might help us debug this:

   Possible unsafe locking scenario:

         CPU0                    CPU1
         ----                    ----
    lock(cpu_hotplug.lock);
                                 lock(cgroup_mutex);
                                 lock(cpu_hotplug.lock);
    lock(cgroup_mutex);

   *** DEADLOCK ***

  5 locks held by bash/645:
   #0:  (&buffer->mutex){+.+.+.}, at: [<ffffffff8123bab8>] sysfs_write_file+0x48/0x150
   #1:  (s_active#42){.+.+.+}, at: [<ffffffff8123bb38>] sysfs_write_file+0xc8/0x150
   #2:  (x86_cpu_hotplug_driver_mutex){+.+...}, at: [<ffffffff81079277>] cpu_hotplug_driver_lock+0x1
+7/0x20
   #3:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff81093157>] cpu_maps_update_begin+0x17/0x20
   #4:  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff8109300f>] cpu_hotplug_begin+0x2f/0x60

  stack backtrace:
  Pid: 645, comm: bash Not tainted 3.7.0-rc4-work+ #42
  Call Trace:
   print_circular_bug+0x28e/0x29f
   __lock_acquire+0x14ce/0x1d20
   lock_acquire+0x97/0x1e0
   mutex_lock_nested+0x61/0x3b0
   cgroup_lock+0x17/0x20
   cpuset_handle_hotplug+0x1b/0x560
   cpuset_update_active_cpus+0xe/0x10
   cpuset_cpu_inactive+0x47/0x50
   notifier_call_chain+0x66/0x150
   __raw_notifier_call_chain+0xe/0x10
   __cpu_notify+0x20/0x40
   _cpu_down+0x7e/0x2f0
   cpu_down+0x36/0x50
   store_online+0x5d/0xe0
   dev_attr_store+0x18/0x30
   sysfs_write_file+0xe0/0x150
   vfs_write+0xa8/0x160
   sys_write+0x52/0xa0
   system_call_fastpath+0x16/0x1b

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f3009b4bae5..09255ec8159 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6090,7 +6090,6 @@ mem_cgroup_css_alloc(struct cgroup *cont)
 						&per_cpu(memcg_stock, cpu);
 			INIT_WORK(&stock->work, drain_local_stock);
 		}
-		hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
 	} else {
 		parent = mem_cgroup_from_cont(cont->parent);
 		memcg->use_hierarchy = parent->use_hierarchy;
@@ -6756,6 +6755,19 @@ struct cgroup_subsys mem_cgroup_subsys = {
 	.use_id = 1,
 };
 
+/*
+ * The rest of init is performed during ->css_alloc() for root css which
+ * happens before initcalls.  hotcpu_notifier() can't be done together as
+ * it would introduce circular locking by adding cgroup_lock -> cpu hotplug
+ * dependency.  Do it from a subsys_initcall().
+ */
+static int __init mem_cgroup_init(void)
+{
+	hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
+	return 0;
+}
+subsys_initcall(mem_cgroup_init);
+
 #ifdef CONFIG_MEMCG_SWAP
 static int __init enable_swap_account(char *s)
 {

From c4e18497d8fd92eef2c6e7eadcc1a107ccd115ea Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 20 Dec 2012 15:05:42 -0800
Subject: [PATCH 18/25] linux/kernel.h: fix DIV_ROUND_CLOSEST with unsigned
 divisors

Commit 263a523d18bc ("linux/kernel.h: Fix warning seen with W=1 due to
change in DIV_ROUND_CLOSEST") fixes a warning seen with W=1 due to
change in DIV_ROUND_CLOSEST.

Unfortunately, the C compiler converts divide operations with unsigned
divisors to unsigned, even if the dividend is signed and negative (for
example, -10 / 5U = 858993457).  The C standard says "If one operand has
unsigned int type, the other operand is converted to unsigned int", so
the compiler is not to blame.  As a result, DIV_ROUND_CLOSEST(0, 2U) and
similar operations now return bad values, since the automatic conversion
of expressions such as "0 - 2U/2" to unsigned was not taken into
account.

Fix by checking for the divisor variable type when deciding which
operation to perform.  This fixes DIV_ROUND_CLOSEST(0, 2U), but still
returns bad values for negative dividends divided by unsigned divisors.
Mark the latter case as unsupported.

One observed effect of this problem is that the s2c_hwmon driver reports
a value of 4198403 instead of 0 if the ADC reads 0.

Other impact is unpredictable.  Problem is seen if the divisor is an
unsigned variable or constant and the dividend is less than (divisor/2).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Juergen Beisert <jbe@pengutronix.de>
Tested-by: Juergen Beisert <jbe@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: <stable@vger.kernel.org>	[3.7.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d140e8fb075..c566927efcb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -77,13 +77,15 @@
 
 /*
  * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors.
+ * to closest integer. Result is undefined for negative divisors and
+ * for negative dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) > 0 || (__x) > 0) ?		\
+	(((typeof(x))-1) > 0 ||				\
+	 ((typeof(divisor))-1) > 0 || (__x) > 0) ?	\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\

From ee297209bf0a25c6717b7c063e76795142d32f37 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <xtfeng@gmail.com>
Date: Thu, 20 Dec 2012 15:05:44 -0800
Subject: [PATCH 19/25] proc: fix inconsistent lock state

Lockdep found an inconsistent lock state when rcu is processing delayed
work in softirq.  Currently, kernel is using spin_lock/spin_unlock to
protect proc_inum_ida, but proc_free_inum is called by rcu in softirq
context.

Use spin_lock_bh/spin_unlock_bh fix following lockdep warning.

  =================================
  [ INFO: inconsistent lock state ]
  3.7.0 #36 Not tainted
  ---------------------------------
  inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
  swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
   (proc_inum_lock){+.?...}, at: proc_free_inum+0x1c/0x50
  {SOFTIRQ-ON-W} state was registered at:
     __lock_acquire+0x8ae/0xca0
     lock_acquire+0x199/0x200
     _raw_spin_lock+0x41/0x50
     proc_alloc_inum+0x4c/0xd0
     alloc_mnt_ns+0x49/0xc0
     create_mnt_ns+0x25/0x70
     mnt_init+0x161/0x1c7
     vfs_caches_init+0x107/0x11a
     start_kernel+0x348/0x38c
     x86_64_start_reservations+0x131/0x136
     x86_64_start_kernel+0x103/0x112
  irq event stamp: 2993422
  hardirqs last  enabled at (2993422):  _raw_spin_unlock_irqrestore+0x55/0x80
  hardirqs last disabled at (2993421):  _raw_spin_lock_irqsave+0x29/0x70
  softirqs last  enabled at (2993394):  _local_bh_enable+0x13/0x20
  softirqs last disabled at (2993395):  call_softirq+0x1c/0x30

  other info that might help us debug this:
   Possible unsafe locking scenario:

         CPU0
         ----
    lock(proc_inum_lock);
    <Interrupt>
      lock(proc_inum_lock);

   *** DEADLOCK ***

  no locks held by swapper/1/0.

  stack backtrace:
  Pid: 0, comm: swapper/1 Not tainted 3.7.0 #36
  Call Trace:
   <IRQ>  [<ffffffff810a40f1>] ? vprintk_emit+0x471/0x510
    print_usage_bug+0x2a5/0x2c0
    mark_lock+0x33b/0x5e0
    __lock_acquire+0x813/0xca0
    lock_acquire+0x199/0x200
    _raw_spin_lock+0x41/0x50
    proc_free_inum+0x1c/0x50
    free_pid_ns+0x1c/0x50
    put_pid_ns+0x2e/0x50
    put_pid+0x4a/0x60
    delayed_put_pid+0x12/0x20
    rcu_process_callbacks+0x462/0x790
    __do_softirq+0x1b4/0x3b0
    call_softirq+0x1c/0x30
    do_softirq+0x59/0xd0
    irq_exit+0x54/0xd0
    smp_apic_timer_interrupt+0x95/0xa3
    apic_timer_interrupt+0x72/0x80
    cpuidle_enter_tk+0x10/0x20
    cpuidle_enter_state+0x17/0x50
    cpuidle_idle_call+0x287/0x520
    cpu_idle+0xba/0x130
    start_secondary+0x2b3/0x2bc

Signed-off-by: Xiaotian Feng <dannyfeng@tencent.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 7b3ae3cc0ef..e659a0ff1da 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -359,18 +359,18 @@ retry:
 	if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
 		return -ENOMEM;
 
-	spin_lock(&proc_inum_lock);
+	spin_lock_bh(&proc_inum_lock);
 	error = ida_get_new(&proc_inum_ida, &i);
-	spin_unlock(&proc_inum_lock);
+	spin_unlock_bh(&proc_inum_lock);
 	if (error == -EAGAIN)
 		goto retry;
 	else if (error)
 		return error;
 
 	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
-		spin_lock(&proc_inum_lock);
+		spin_lock_bh(&proc_inum_lock);
 		ida_remove(&proc_inum_ida, i);
-		spin_unlock(&proc_inum_lock);
+		spin_unlock_bh(&proc_inum_lock);
 		return -ENOSPC;
 	}
 	*inum = PROC_DYNAMIC_FIRST + i;
@@ -379,9 +379,9 @@ retry:
 
 void proc_free_inum(unsigned int inum)
 {
-	spin_lock(&proc_inum_lock);
+	spin_lock_bh(&proc_inum_lock);
 	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
-	spin_unlock(&proc_inum_lock);
+	spin_unlock_bh(&proc_inum_lock);
 }
 
 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)

From 99bbb2b0d4431e67d11a98f66a6ef8fcd8622649 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr.bueso@hp.com>
Date: Thu, 20 Dec 2012 15:05:45 -0800
Subject: [PATCH 20/25] Documentation: ABI: remove testing/sysfs-devices-node

This file is already documented in the stable ABI (see commit
5bbe1ec11fcf).

Signed-off-by: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: Greg KH <greg@kroah.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-devices-node | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 Documentation/ABI/testing/sysfs-devices-node

diff --git a/Documentation/ABI/testing/sysfs-devices-node b/Documentation/ABI/testing/sysfs-devices-node
deleted file mode 100644
index 453a210c3ce..00000000000
--- a/Documentation/ABI/testing/sysfs-devices-node
+++ /dev/null
@@ -1,7 +0,0 @@
-What:		/sys/devices/system/node/nodeX/compact
-Date:		February 2010
-Contact:	Mel Gorman <mel@csn.ul.ie>
-Description:
-		When this file is written to, all memory within that node
-		will be compacted. When it completes, memory will be freed
-		into blocks which have as many contiguous pages as possible

From c39540c6d1add1d0ad843b3d2437311924193359 Mon Sep 17 00:00:00 2001
From: Ravishankar N <cyberax82@gmail.com>
Date: Thu, 20 Dec 2012 15:05:46 -0800
Subject: [PATCH 21/25] fat: fix incorrect function comment

fat_search_long() returns 0 on success, -ENOENT/ENOMEM on failure.
Change the function comment accordingly.

While at it, fix some trivial typos.

Signed-off-by: Ravishankar N <cyberax82@gmail.com>
Signed-off-by: Namjae Jeon <linkinjeon@gmail.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c   | 5 ++---
 fs/fat/inode.c | 2 +-
 fs/fat/misc.c  | 4 ++++
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 2a182342442..58bf744dbf3 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -461,8 +461,7 @@ static int fat_parse_short(struct super_block *sb,
 }
 
 /*
- * Return values: negative -> error, 0 -> not found, positive -> found,
- * value is the total amount of slots, including the shortname entry.
+ * Return values: negative -> error/not found, 0 -> found.
  */
 int fat_search_long(struct inode *inode, const unsigned char *name,
 		    int name_len, struct fat_slot_info *sinfo)
@@ -1255,7 +1254,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
 
 	sinfo->nr_slots = nr_slots;
 
-	/* First stage: search free direcotry entries */
+	/* First stage: search free directory entries */
 	free_slots = nr_bhs = 0;
 	bh = prev = NULL;
 	pos = 0;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 35806813ea4..f8f491677a4 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1344,7 +1344,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 	sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
 	if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
 		if (!silent)
-			fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
+			fat_msg(sb, KERN_ERR, "bogus directory-entries per block"
 			       " (%u)", sbi->dir_entries);
 		brelse(bh);
 		goto out_invalid;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 5eb600dc43a..359d307b550 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -135,6 +135,10 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
 		}
 		if (ret < 0)
 			return ret;
+		/*
+		 * FIXME:Although we can add this cache, fat_cache_add() is
+		 * assuming to be called after linear search with fat_cache_id.
+		 */
 //		fat_cache_add(inode, new_fclus, new_dclus);
 	} else {
 		MSDOS_I(inode)->i_start = new_dclus;

From 891348ca0f66206f1dc0e30d63757e3df1ae2d15 Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Thu, 20 Dec 2012 15:05:50 -0800
Subject: [PATCH 22/25] SGI-XP: handle non-fatal traps

We found a user code which was raising a divide-by-zero trap.  That trap
would lead to XPC connections between system-partitions being torn down
due to the die_chain notifier callouts it received.

This also revealed a different issue where multiple callers into
xpc_die_deactivate() would all attempt to do the disconnect in parallel
which would sometimes lock up but often overwhelm the console on very
large machines as each would print at least one line of output at the
end of the deactivate.

I reviewed all the users of the die_chain notifier and changed the code
to ignore the notifier callouts for reasons which will not actually lead
to a system to continue on to call die().

[akpm@linux-foundation.org: fix ia64]
Signed-off-by: Robin Holt <holt@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-xp/xpc_main.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 8d082b46426..d971817182f 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -53,6 +53,10 @@
 #include <linux/kthread.h>
 #include "xpc.h"
 
+#ifdef CONFIG_X86_64
+#include <asm/traps.h>
+#endif
+
 /* define two XPC debug device structures to be used with dev_dbg() et al */
 
 struct device_driver xpc_dbg_name = {
@@ -1079,6 +1083,9 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
 	return NOTIFY_DONE;
 }
 
+/* Used to only allow one cpu to complete disconnect */
+static unsigned int xpc_die_disconnecting;
+
 /*
  * Notify other partitions to deactivate from us by first disengaging from all
  * references to our memory.
@@ -1092,6 +1099,9 @@ xpc_die_deactivate(void)
 	long keep_waiting;
 	long wait_to_print;
 
+	if (cmpxchg(&xpc_die_disconnecting, 0, 1))
+		return;
+
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
 	xpc_exiting = 1;
 
@@ -1159,7 +1169,7 @@ xpc_die_deactivate(void)
  * about the lack of a heartbeat.
  */
 static int
-xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
 {
 #ifdef CONFIG_IA64		/* !!! temporary kludge */
 	switch (event) {
@@ -1191,7 +1201,27 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		break;
 	}
 #else
-	xpc_die_deactivate();
+	struct die_args *die_args = _die_args;
+
+	switch (event) {
+	case DIE_TRAP:
+		if (die_args->trapnr == X86_TRAP_DF)
+			xpc_die_deactivate();
+
+		if (((die_args->trapnr == X86_TRAP_MF) ||
+		     (die_args->trapnr == X86_TRAP_XF)) &&
+		    !user_mode_vm(die_args->regs))
+			xpc_die_deactivate();
+
+		break;
+	case DIE_INT3:
+	case DIE_DEBUG:
+		break;
+	case DIE_OOPS:
+	case DIE_GPF:
+	default:
+		xpc_die_deactivate();
+	}
 #endif
 
 	return NOTIFY_DONE;

From a68c2f12b4b28994aaf622bbe5724b7258cc2fcf Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@umich.edu>
Date: Thu, 20 Dec 2012 15:05:52 -0800
Subject: [PATCH 23/25] sendfile: allows bypassing of notifier events

do_sendfile() in fs/read_write.c does not call the fsnotify functions,
unlike its neighbors.  This manifests as a lack of inotify ACCESS events
when a file is sent using sendfile(2).

Addresses
  https://bugzilla.kernel.org/show_bug.cgi?id=12812

[akpm@linux-foundation.org: use fsnotify_modify(out.file), not fsnotify_access(), per Dave]
Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Scott Wolchok <swolchok@umich.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/read_write.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 1edaf099ddd..bb34af31528 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -935,6 +935,8 @@ ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
 	if (retval > 0) {
 		add_rchar(current, retval);
 		add_wchar(current, retval);
+		fsnotify_access(in.file);
+		fsnotify_modify(out.file);
 	}
 
 	inc_syscr(current);

From e67eab39bee26f509d38d00ca1a8f24b63f46a31 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 20 Dec 2012 15:05:54 -0800
Subject: [PATCH 24/25] keys: fix unreachable code

We set ret to NULL then test it. Remove the bogus test

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/keys/process_keys.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 58dfe089094..20e4bf57aec 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -367,8 +367,6 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 
 		switch (PTR_ERR(key_ref)) {
 		case -EAGAIN: /* no key */
-			if (ret)
-				break;
 		case -ENOKEY: /* negative key */
 			ret = key_ref;
 			break;

From cfde819088422503b5c69e03ab7bb90f87121d4d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 15:05:56 -0800
Subject: [PATCH 25/25] keys: use keyring_alloc() to create module signing
 keyring

Use keyring_alloc() to create special keyrings now that it has
a permissions parameter rather than using key_alloc() +
key_instantiate_and_link().

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/modsign_pubkey.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
index 045504fffbb..2b6e69909c3 100644
--- a/kernel/modsign_pubkey.c
+++ b/kernel/modsign_pubkey.c
@@ -34,18 +34,15 @@ static __init int module_verify_init(void)
 {
 	pr_notice("Initialise module verification\n");
 
-	modsign_keyring = key_alloc(&key_type_keyring, ".module_sign",
-				    KUIDT_INIT(0), KGIDT_INIT(0),
-				    current_cred(),
-				    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-				    KEY_USR_VIEW | KEY_USR_READ,
-				    KEY_ALLOC_NOT_IN_QUOTA);
+	modsign_keyring = keyring_alloc(".module_sign",
+					KUIDT_INIT(0), KGIDT_INIT(0),
+					current_cred(),
+					((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+					 KEY_USR_VIEW | KEY_USR_READ),
+					KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(modsign_keyring))
 		panic("Can't allocate module signing keyring\n");
 
-	if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0)
-		panic("Can't instantiate module signing keyring\n");
-
 	return 0;
 }