From e2690695ce5085677b84fbf2e38d2ed57cad39cd Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Sun, 15 Apr 2012 02:20:27 +0200 Subject: [PATCH 001/475] fuse: Convert to kstrtoul_from_user This patch replaces the code for getting an number from a userspace buffer by a simple call to kstroul_from_user. This makes it easier to read and less error prone. Signed-off-by: Peter Huewe Signed-off-by: Miklos Szeredi --- fs/fuse/control.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 42593c587d4..03ff5b1eba9 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -75,19 +75,13 @@ static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf, unsigned global_limit) { unsigned long t; - char tmp[32]; unsigned limit = (1 << 16) - 1; int err; - if (*ppos || count >= sizeof(tmp) - 1) + if (*ppos) return -EINVAL; - if (copy_from_user(tmp, buf, count)) - return -EINVAL; - - tmp[count] = '\0'; - - err = strict_strtoul(tmp, 0, &t); + err = kstrtoul_from_user(buf, count, 0, &t); if (err) return err; From 05ba1f0823004e947748523782e9c2f07f3bff0d Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Sun, 22 Apr 2012 18:45:24 -0700 Subject: [PATCH 002/475] fuse: add FALLOCATE operation fallocate filesystem operation preallocates media space for the given file. If fallocate returns success then any subsequent write to the given range never fails with 'not enough space' error. Signed-off-by: Anatol Pomozov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 33 +++++++++++++++++++++++++++++++++ include/linux/fuse.h | 14 +++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 504e61b7fd7..e3fee88831d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2171,6 +2171,37 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, return ret; } +long fuse_file_fallocate(struct file *file, int mode, loff_t offset, + loff_t length) +{ + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + struct fuse_req *req; + struct fuse_fallocate_in inarg = { + .fh = ff->fh, + .offset = offset, + .length = length, + .mode = mode + }; + int err; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->in.h.opcode = FUSE_FALLOCATE; + req->in.h.nodeid = ff->nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + + return err; +} +EXPORT_SYMBOL_GPL(fuse_file_fallocate); + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read = do_sync_read, @@ -2188,6 +2219,7 @@ static const struct file_operations fuse_file_operations = { .unlocked_ioctl = fuse_file_ioctl, .compat_ioctl = fuse_file_compat_ioctl, .poll = fuse_file_poll, + .fallocate = fuse_file_fallocate, }; static const struct file_operations fuse_direct_io_file_operations = { @@ -2204,6 +2236,7 @@ static const struct file_operations fuse_direct_io_file_operations = { .unlocked_ioctl = fuse_file_ioctl, .compat_ioctl = fuse_file_compat_ioctl, .poll = fuse_file_poll, + .fallocate = fuse_file_fallocate, /* no splice_read */ }; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 8f2ab8fef92..9303348965f 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -54,6 +54,9 @@ * 7.18 * - add FUSE_IOCTL_DIR flag * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE */ #ifndef _LINUX_FUSE_H @@ -85,7 +88,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 18 +#define FUSE_KERNEL_MINOR_VERSION 19 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -278,6 +281,7 @@ enum fuse_opcode { FUSE_POLL = 40, FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out { __u64 kh; }; +struct fuse_fallocate_in { + __u64 fh; + __u64 offset; + __u64 length; + __u32 mode; + __u32 padding; +}; + struct fuse_in_header { __u32 len; __u32 opcode; From 519c6040ce04474bc893774f866fd8d907b20429 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 26 Apr 2012 10:56:36 +0200 Subject: [PATCH 003/475] fuse: optimize fallocate on permanent failure If userspace filesystem doesn't support fallocate, remember this and don't send request next time. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 7 +++++++ fs/fuse/fuse_i.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e3fee88831d..bbfd571b37e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2185,6 +2185,9 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset, }; int err; + if (fc->no_fallocate) + return -EOPNOTSUPP; + req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -2196,6 +2199,10 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset, req->in.args[0].value = &inarg; fuse_request_send(fc, req); err = req->out.h.error; + if (err == -ENOSYS) { + fc->no_fallocate = 1; + err = -EOPNOTSUPP; + } fuse_put_request(fc, req); return err; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 572cefc7801..f38fb795f03 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -478,6 +478,9 @@ struct fuse_conn { /** Are BSD file locking primitives not implemented by fs? */ unsigned no_flock:1; + /** Is fallocate not implemented by fs? */ + unsigned no_fallocate:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; From 2a01bb3885c9145dbb7583d5aa5f5d5504f6f46f Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 11 Apr 2012 08:15:29 -0400 Subject: [PATCH 004/475] panic: Make panic_on_oops configurable Several distros set this by default by patching panic_on_oops. It seems to fit with the BOOTPARAM_{HARD,SOFT}_PANIC options though, so let's add a Kconfig entry and reduce some more upstream delta. Signed-off-by: Kyle McMartin Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120411121529.GH26688@redacted.bos.redhat.com Signed-off-by: Ingo Molnar --- kernel/panic.c | 2 +- lib/Kconfig.debug | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 8ed89a175d7..b6215b7ce99 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -27,7 +27,7 @@ #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 -int panic_on_oops; +int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; static unsigned long tainted_mask; static int pause_on_oops; static int pause_on_oops_flag; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6777153f18f..91858cd8394 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -228,6 +228,26 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC +config PANIC_ON_OOPS + bool "Panic on Oops" if EXPERT + default n + help + Say Y here to enable the kernel to panic when it oopses. This + has the same effect as setting oops=panic on the kernel command + line. + + This feature is useful to ensure that the kernel does not do + anything erroneous after an oops which could result in data + corruption or other issues. + + Say N if unsure. + +config PANIC_ON_OOPS_VALUE + int + range 0 1 + default 0 if !PANIC_ON_OOPS + default 1 if PANIC_ON_OOPS + config DETECT_HUNG_TASK bool "Detect Hung Tasks" depends on DEBUG_KERNEL From c5971456964290da7e98222892797b71ef793e62 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 3 May 2012 14:48:26 +0100 Subject: [PATCH 005/475] ACPI battery: only refresh the sysfs files when pertinent information changes We only need to regenerate the sysfs files when the capacity units change, avoid the update otherwise. The origin of this issue is dates way back to 2.6.38: da8aeb92d4853f37e281f11fddf61f9c7d84c3cd (ACPI / Battery: Update information on info notification and resume) cc: Signed-off-by: Andy Whitcroft Tested-by: Ralf Jung Signed-off-by: Len Brown --- drivers/acpi/battery.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 86933ca8b47..7dd3f9fb9f3 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -643,11 +643,19 @@ static int acpi_battery_update(struct acpi_battery *battery) static void acpi_battery_refresh(struct acpi_battery *battery) { + int power_unit; + if (!battery->bat.dev) return; + power_unit = battery->power_unit; + acpi_battery_get_info(battery); - /* The battery may have changed its reporting units. */ + + if (power_unit == battery->power_unit) + return; + + /* The battery has changed its reporting units. */ sysfs_remove_battery(battery); sysfs_add_battery(battery); } From d8e725f356fd5f225ad97f21213fc007e409c9f5 Mon Sep 17 00:00:00 2001 From: Marco Aurelio da Costa Date: Fri, 4 May 2012 18:53:44 +0200 Subject: [PATCH 006/475] ACPI: Ignore invalid _PSS entries, but use valid ones The EliteBook 8560W has non-initialized entries in its _PSS ACPI table. Instead of bailing out when the first non-initialized entry is found, ignore it and use only the valid entries. Only bail out if there is no valid entry at all. [v3: Fixes suggested by Konrad] Signed-off-by: Marco Aurelio da Costa Signed-off-by: Len Brown --- drivers/acpi/processor_perflib.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 0af48a8554c..a093dc163a4 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -333,6 +333,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) struct acpi_buffer state = { 0, NULL }; union acpi_object *pss = NULL; int i; + int last_invalid = -1; status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); @@ -394,14 +395,33 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) ((u32)(px->core_frequency * 1000) != (px->core_frequency * 1000))) { printk(KERN_ERR FW_BUG PREFIX - "Invalid BIOS _PSS frequency: 0x%llx MHz\n", - px->core_frequency); - result = -EFAULT; - kfree(pr->performance->states); - goto end; + "Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n", + pr->id, px->core_frequency); + if (last_invalid == -1) + last_invalid = i; + } else { + if (last_invalid != -1) { + /* + * Copy this valid entry over last_invalid entry + */ + memcpy(&(pr->performance->states[last_invalid]), + px, sizeof(struct acpi_processor_px)); + ++last_invalid; + } } } + if (last_invalid == 0) { + printk(KERN_ERR FW_BUG PREFIX + "No valid BIOS _PSS frequency found for processor %d\n", pr->id); + result = -EFAULT; + kfree(pr->performance->states); + pr->performance->states = NULL; + } + + if (last_invalid > 0) + pr->performance->state_count = last_invalid; + end: kfree(buffer.pointer); From 45c72cd73c788dd18c8113d4a404d6b4a01decf1 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 May 2012 19:49:38 +0400 Subject: [PATCH 007/475] fuse: fix stat call on 32 bit platforms Now we store attr->ino at inode->i_ino, return attr->ino at the first time and then return inode->i_ino if the attribute timeout isn't expired. That's wrong on 32 bit platforms because attr->ino is 64 bit and inode->i_ino is 32 bit in this case. Fix this by saving 64 bit ino in fuse_inode structure and returning it every time we call getattr. Also squash attr->ino into inode->i_ino explicitly. Signed-off-by: Pavel Shilovsky Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 1 + fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 17 ++++++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index df5ac048dc7..bc438320cac 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -863,6 +863,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, if (stat) { generic_fillattr(inode, stat); stat->mode = fi->orig_i_mode; + stat->ino = fi->orig_ino; } } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index f38fb795f03..771fb6322c0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -82,6 +82,9 @@ struct fuse_inode { preserve the original mode */ umode_t orig_i_mode; + /** 64 bit inode number */ + u64 orig_ino; + /** Version of last attribute change */ u64 attr_version; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 26783eb2b1f..a59cf5e673d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -91,6 +91,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->nlookup = 0; fi->attr_version = 0; fi->writectr = 0; + fi->orig_ino = 0; INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); @@ -139,6 +140,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } +/* + * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down + * so that it will fit. + */ +static ino_t fuse_squash_ino(u64 ino64) +{ + ino_t ino = (ino_t) ino64; + if (sizeof(ino_t) < sizeof(u64)) + ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; + return ino; +} + void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -148,7 +161,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->attr_version = ++fc->attr_version; fi->i_time = attr_valid; - inode->i_ino = attr->ino; + inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); set_nlink(inode, attr->nlink); inode->i_uid = attr->uid; @@ -174,6 +187,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->orig_i_mode = inode->i_mode; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; + + fi->orig_ino = attr->ino; } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, From 203627bbc90377c509e32450c67c5d957ba2d989 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 May 2012 19:49:38 +0400 Subject: [PATCH 008/475] fuse: fix blksize calculation Don't use inode->i_blkbits which might be stale, instead calculate the blksize information from the freshly obtained attributes. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index bc438320cac..334e0b18a01 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -775,6 +775,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, struct kstat *stat) { + unsigned int blkbits; + stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); @@ -790,7 +792,13 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, stat->ctime.tv_nsec = attr->ctimensec; stat->size = attr->size; stat->blocks = attr->blocks; - stat->blksize = (1 << inode->i_blkbits); + + if (attr->blksize != 0) + blkbits = ilog2(attr->blksize); + else + blkbits = inode->i_sb->s_blocksize_bits; + + stat->blksize = 1 << blkbits; } static int fuse_do_getattr(struct inode *inode, struct kstat *stat, From c3ba9698152b17fdc2c7cd0f7cbeb571e3367e9d Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:06:54 -0600 Subject: [PATCH 009/475] mm: frontswap: add frontswap header file Frontswap is the alter ego of cleancache, the "yang" to cleancache's "yin"... and more precisely frontswap is the provider of anonymous pages to transcendent memory to nicely complement cleancache's providing of clean pagecache pages to transcendent memory. For optimal use of transcendent memory, both are necessary... because a kernel under memory pressure first reclaims clean pagecache pages and, when under more memory pressure, starts swapping anonymous pages. Frontswap and cleancache (which was merged at 3.0) are the "frontends" and the only necessary changes to the core kernel for transcendent memory; all other supporting code -- the "backends" -- is implemented as drivers. See the LWN.net article "Transcendent memory in a nutshell" for a detailed overview of frontswap and related kernel parts: https://lwn.net/Articles/454795/ Frontswap code was first posted publicly in January 2009 and on LKML in May 2009, and has remained functionally stable for nearly three years now. It is barely invasive, touching only the swap subsystem and adds less than 100 lines of code to existing swap subsystem code files. It has improved syntactically substantially between V1 and this posting of V14, thanks to the review of a few kernel developers, and has adapted easily to at least one major swap subsystem change. As of 3.4, there are three in-tree users of frontswap patiently waiting for this patchset and for CONFIG_FRONTSWAP to be enabled: zcache (staging driver merged at 2.6.39), Xen tmem (merged at 3.0 and 3.1) and RAMster (staging driver merged at 3.4). In addition, a RFC has been posted for a KVM backend. The frontswap patchset has been in linux-next since next-110603. Earlier versions of frontswap already ship in the Oracle Unbreakable Enterprise Kernel and SuSE SLES. This patch, 1of4, provides the header file for the core code for frontswap that interfaces between the hooks in the swap subsystem and a frontswap backend via frontswap_ops. --- New file added: include/linux/frontswap.h [v14: add support for writethrough, per suggestion by aarcange@redhat.com] [v14: rebase to 3.4-rc2] [v11: konrad.wilk@oracle.com: squashed s/flush/invalidate/ in] [v10: no change] [v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 1] [v8: rebase to 3.0-rc4] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: new static inlines resolve to no-ops if not config'd] [v7: JBeulich@novell.com: avoid redundant shifts/divides for *_bit lib calls] [v6: rebase to 3.1-rc1] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Andrew Morton Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel [v15: int/bool on some functions] Signed-off-by: Konrad Wilk --- include/linux/frontswap.h | 127 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 include/linux/frontswap.h diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h new file mode 100644 index 00000000000..68ff7af5c5f --- /dev/null +++ b/include/linux/frontswap.h @@ -0,0 +1,127 @@ +#ifndef _LINUX_FRONTSWAP_H +#define _LINUX_FRONTSWAP_H + +#include +#include +#include + +struct frontswap_ops { + void (*init)(unsigned); + int (*put_page)(unsigned, pgoff_t, struct page *); + int (*get_page)(unsigned, pgoff_t, struct page *); + void (*invalidate_page)(unsigned, pgoff_t); + void (*invalidate_area)(unsigned); +}; + +extern bool frontswap_enabled; +extern struct frontswap_ops + frontswap_register_ops(struct frontswap_ops *ops); +extern void frontswap_shrink(unsigned long); +extern unsigned long frontswap_curr_pages(void); +extern void frontswap_writethrough(bool); + +extern void __frontswap_init(unsigned type); +extern int __frontswap_put_page(struct page *page); +extern int __frontswap_get_page(struct page *page); +extern void __frontswap_invalidate_page(unsigned, pgoff_t); +extern void __frontswap_invalidate_area(unsigned); + +#ifdef CONFIG_FRONTSWAP + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + bool ret = false; + + if (frontswap_enabled && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + set_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + clear_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ + p->frontswap_map = map; +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return p->frontswap_map; +} +#else +/* all inline routines become no-ops and all externs are ignored */ + +#define frontswap_enabled (0) + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + return false; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return NULL; +} +#endif + +static inline int frontswap_put_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_put_page(page); + return ret; +} + +static inline int frontswap_get_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_get_page(page); + return ret; +} + +static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) +{ + if (frontswap_enabled) + __frontswap_invalidate_page(type, offset); +} + +static inline void frontswap_invalidate_area(unsigned type) +{ + if (frontswap_enabled) + __frontswap_invalidate_area(type); +} + +static inline void frontswap_init(unsigned type) +{ + if (frontswap_enabled) + __frontswap_init(type); +} + +#endif /* _LINUX_FRONTSWAP_H */ From 38b5faf4b178d5279b1fca5d7dadc68881342660 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:08:06 -0600 Subject: [PATCH 010/475] mm: frontswap: core swap subsystem hooks and headers This patch, 2of4, contains the changes to the core swap subsystem. This includes: (1) makes available core swap data structures (swap_lock, swap_list and swap_info) that are needed by frontswap.c but we don't need to expose them to the dozens of files that include swap.h so we create a new swapfile.h just to extern-ify these and modify their declarations to non-static (2) adds frontswap-related elements to swap_info_struct. Frontswap_map points to vzalloc'ed one-bit-per-swap-page metadata that indicates whether the swap page is in frontswap or in the device and frontswap_pages counts how many pages are in frontswap. (3) adds hooks in the swap subsystem and extends try_to_unuse so that frontswap_shrink can do a "partial swapoff". Note that a failed frontswap_map allocation is safe... failure is noted by lack of "FS" in the subsequent printk. --- [v14: rebase to 3.4-rc2] [v10: no change] [v9: akpm@linux-foundation.org: mark some statics __read_mostly] [v9: akpm@linux-foundation.org: add clarifying comments] [v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse] [v9: error27@gmail.com: remove superfluous check for NULL] [v8: rebase to 3.0-rc4] [v8: kamezawa.hiroyu@jp.fujitsu.com: change counter to atomic_t to avoid races] [v8: kamezawa.hiroyu@jp.fujitsu.com: comment to clarify informational counters] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: add new swap struct elements only if config'd] [v6: rebase to 3.0-rc1] [v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails] [v6: konrad.wilk@oracl.com: various checks and code clarifications/comments] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Kamezawa Hiroyuki Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel Cc: Andrew Morton [v11: Rebased, fixed mm/swapfile.c context change] Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swap.h | 4 +++ include/linux/swapfile.h | 13 ++++++++++ mm/page_io.c | 12 +++++++++ mm/swapfile.c | 54 ++++++++++++++++++++++++++++++---------- 4 files changed, 70 insertions(+), 13 deletions(-) create mode 100644 include/linux/swapfile.h diff --git a/include/linux/swap.h b/include/linux/swap.h index b1fd5c7925f..50a55e2d58e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -197,6 +197,10 @@ struct swap_info_struct { struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ +#ifdef CONFIG_FRONTSWAP + unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ + atomic_t frontswap_pages; /* frontswap pages in-use counter */ +#endif }; struct swap_list_t { diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h new file mode 100644 index 00000000000..e282624e8c1 --- /dev/null +++ b/include/linux/swapfile.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_SWAPFILE_H +#define _LINUX_SWAPFILE_H + +/* + * these were static in swapfile.c but frontswap.c needs them and we don't + * want to expose them to the dozens of source files that include swap.h + */ +extern spinlock_t swap_lock; +extern struct swap_list_t swap_list; +extern struct swap_info_struct *swap_info[]; +extern int try_to_unuse(unsigned int, bool, unsigned long); + +#endif /* _LINUX_SWAPFILE_H */ diff --git a/mm/page_io.c b/mm/page_io.c index dc76b4d0611..651a9125931 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } + if (frontswap_put_page(page) == 0) { + set_page_writeback(page); + unlock_page(page); + end_page_writeback(page); + goto out; + } bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); if (bio == NULL) { set_page_dirty(page); @@ -122,6 +129,11 @@ int swap_readpage(struct page *page) VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageUptodate(page)); + if (frontswap_get_page(page) == 0) { + SetPageUptodate(page); + unlock_page(page); + goto out; + } bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); if (bio == NULL) { unlock_page(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index fafc26d1b1d..9c7be87175c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, static void free_swap_count_continuations(struct swap_info_struct *); static sector_t map_swap_entry(swp_entry_t, struct block_device**); -static DEFINE_SPINLOCK(swap_lock); +DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; long nr_swap_pages; long total_swap_pages; @@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; -static struct swap_list_t swap_list = {-1, -1}; +struct swap_list_t swap_list = {-1, -1}; -static struct swap_info_struct *swap_info[MAX_SWAPFILES]; +struct swap_info_struct *swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); @@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, swap_list.next = p->type; nr_swap_pages++; p->inuse_pages--; + frontswap_invalidate_page(p->type, offset); if ((p->flags & SWP_BLKDEV) && disk->fops->swap_slot_free_notify) disk->fops->swap_slot_free_notify(p->bdev, offset); @@ -1016,11 +1019,12 @@ static int unuse_mm(struct mm_struct *mm, } /* - * Scan swap_map from current position to next entry still in use. + * Scan swap_map (or frontswap_map if frontswap parameter is true) + * from current position to next entry still in use. * Recycle to start on reaching the end, returning 0 when empty. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, - unsigned int prev) + unsigned int prev, bool frontswap) { unsigned int max = si->max; unsigned int i = prev; @@ -1046,6 +1050,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, prev = 0; i = 1; } + if (frontswap) { + if (frontswap_test(si, i)) + break; + else + continue; + } count = si->swap_map[i]; if (count && swap_count(count) != SWAP_MAP_BAD) break; @@ -1057,8 +1067,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. + * + * if the boolean frontswap is true, only unuse pages_to_unuse pages; + * pages_to_unuse==0 means all pages; ignored if frontswap is false */ -static int try_to_unuse(unsigned int type) +int try_to_unuse(unsigned int type, bool frontswap, + unsigned long pages_to_unuse) { struct swap_info_struct *si = swap_info[type]; struct mm_struct *start_mm; @@ -1091,7 +1105,7 @@ static int try_to_unuse(unsigned int type) * one pass through swap_map is enough, but not necessarily: * there are races when an instance of an entry might be missed. */ - while ((i = find_next_to_unuse(si, i)) != 0) { + while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { if (signal_pending(current)) { retval = -EINTR; break; @@ -1258,6 +1272,10 @@ static int try_to_unuse(unsigned int type) * interactive performance. */ cond_resched(); + if (frontswap && pages_to_unuse > 0) { + if (!--pages_to_unuse) + break; + } } mmput(start_mm); @@ -1517,7 +1535,8 @@ bad_bmap: } static void enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map) + unsigned char *swap_map, + unsigned long *frontswap_map) { int i, prev; @@ -1527,6 +1546,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; + frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; nr_swap_pages += p->pages; total_swap_pages += p->pages; @@ -1543,6 +1563,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, swap_list.head = swap_list.next = p->type; else swap_info[prev]->next = p->type; + frontswap_init(p->type); spin_unlock(&swap_lock); } @@ -1616,7 +1637,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); - err = try_to_unuse(type); + err = try_to_unuse(type, false, 0); /* force all pages to be unused */ compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); if (err) { @@ -1627,7 +1648,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) * sys_swapoff for this swap_info_struct at this point. */ /* re-insert swap space back into swap_list */ - enable_swap_info(p, p->prio, p->swap_map); + enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); goto out_dput; } @@ -1653,9 +1674,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; + frontswap_invalidate_area(type); spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); + vfree(frontswap_map_get(p)); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); @@ -2019,6 +2042,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) sector_t span; unsigned long maxpages; unsigned char *swap_map = NULL; + unsigned long *frontswap_map = NULL; struct page *page = NULL; struct inode *inode = NULL; @@ -2102,6 +2126,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = nr_extents; goto bad_swap; } + /* frontswap enabled? set up bit-per-page map for frontswap */ + if (frontswap_enabled) + frontswap_map = vzalloc(maxpages / sizeof(long)); if (p->bdev) { if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { @@ -2117,14 +2144,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (swap_flags & SWAP_FLAG_PREFER) prio = (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; - enable_swap_info(p, prio, swap_map); + enable_swap_info(p, prio, swap_map, frontswap_map); printk(KERN_INFO "Adding %uk swap on %s. " - "Priority:%d extents:%d across:%lluk %s%s\n", + "Priority:%d extents:%d across:%lluk %s%s%s\n", p->pages<<(PAGE_SHIFT-10), name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), (p->flags & SWP_SOLIDSTATE) ? "SS" : "", - (p->flags & SWP_DISCARDABLE) ? "D" : ""); + (p->flags & SWP_DISCARDABLE) ? "D" : "", + (frontswap_map) ? "FS" : ""); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event); From 29f233cfffe7fbc6672938117ce7e4154a2f515f Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:09:27 -0600 Subject: [PATCH 011/475] mm: frontswap: core frontswap functionality This patch, 3of4, provides the core frontswap code that interfaces between the hooks in the swap subsystem and a frontswap backend via frontswap_ops. --- New file added: mm/frontswap.c [v14: add support for writethrough, per suggestion by aarcange@redhat.com] [v11: sjenning@linux.vnet.ibm.com: s/puts/failed_puts/] [v10: sjenning@linux.vnet.ibm.com: fix debugfs calls on 32-bit] [v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 1] [v9: akpm@linux-foundation.org: mark some statics __read_mostly] [v9: akpm@linux-foundation.org: add clarifying comments] [v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse] [v9: error27@gmail.com: remove superfluous check for NULL] [v8: rebase to 3.0-rc4] [v8: kamezawa.hiroyu@jp.fujitsu.com: add comment to clarify find_next_to_unuse] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: use new static inlines, no-ops if not config'd] [v6: rebase to 3.1-rc1] [v6: lliubbo@gmail.com: use vzalloc] [v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails] [v6: konrad.wilk@oracl.com: various checks and code clarifications/comments] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel Cc: Andrew Morton [v12: Squashed s/flush/invalidate/ in] [v15: A bit of cleanup and seperate DEBUGFS] Signed-off-by: Konrad Wilk --- mm/frontswap.c | 314 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 mm/frontswap.c diff --git a/mm/frontswap.c b/mm/frontswap.c new file mode 100644 index 00000000000..8c0a5f8683f --- /dev/null +++ b/mm/frontswap.c @@ -0,0 +1,314 @@ +/* + * Frontswap frontend + * + * This code provides the generic "frontend" layer to call a matching + * "backend" driver implementation of frontswap. See + * Documentation/vm/frontswap.txt for more information. + * + * Copyright (C) 2009-2012 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * frontswap_ops is set by frontswap_register_ops to contain the pointers + * to the frontswap "backend" implementation functions. + */ +static struct frontswap_ops frontswap_ops __read_mostly; + +/* + * This global enablement flag reduces overhead on systems where frontswap_ops + * has not been registered, so is preferred to the slower alternative: a + * function call that checks a non-global. + */ +bool frontswap_enabled __read_mostly; +EXPORT_SYMBOL(frontswap_enabled); + +/* + * If enabled, frontswap_put will return failure even on success. As + * a result, the swap subsystem will always write the page to swap, in + * effect converting frontswap into a writethrough cache. In this mode, + * there is no direct reduction in swap writes, but a frontswap backend + * can unilaterally "reclaim" any pages in use with no data loss, thus + * providing increases control over maximum memory usage due to frontswap. + */ +static bool frontswap_writethrough_enabled __read_mostly; + +#ifdef CONFIG_DEBUG_FS +/* + * Counters available via /sys/kernel/debug/frontswap (if debugfs is + * properly configured). These are for information only so are not protected + * against increment races. + */ +static u64 frontswap_gets; +static u64 frontswap_succ_puts; +static u64 frontswap_failed_puts; +static u64 frontswap_invalidates; + +static inline void inc_frontswap_gets(void) { + frontswap_gets++; +} +static inline void inc_frontswap_succ_puts(void) { + frontswap_succ_puts++; +} +static inline void inc_frontswap_failed_puts(void) { + frontswap_failed_puts++; +} +static inline void inc_frontswap_invalidates(void) { + frontswap_invalidates++; +} +#else +static inline void inc_frontswap_gets(void) { } +static inline void inc_frontswap_succ_puts(void) { } +static inline void inc_frontswap_failed_puts(void) { } +static inline void inc_frontswap_invalidates(void) { } +#endif +/* + * Register operations for frontswap, returning previous thus allowing + * detection of multiple backends and possible nesting. + */ +struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) +{ + struct frontswap_ops old = frontswap_ops; + + frontswap_ops = *ops; + frontswap_enabled = true; + return old; +} +EXPORT_SYMBOL(frontswap_register_ops); + +/* + * Enable/disable frontswap writethrough (see above). + */ +void frontswap_writethrough(bool enable) +{ + frontswap_writethrough_enabled = enable; +} +EXPORT_SYMBOL(frontswap_writethrough); + +/* + * Called when a swap device is swapon'd. + */ +void __frontswap_init(unsigned type) +{ + struct swap_info_struct *sis = swap_info[type]; + + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + if (frontswap_enabled) + (*frontswap_ops.init)(type); +} +EXPORT_SYMBOL(__frontswap_init); + +/* + * "Put" data from a page to frontswap and associate it with the page's + * swaptype and offset. Page must be locked and in the swap cache. + * If frontswap already contains a page with matching swaptype and + * offset, the frontswap implmentation may either overwrite the data and + * return success or invalidate the page from frontswap and return failure. + */ +int __frontswap_put_page(struct page *page) +{ + int ret = -1, dup = 0; + swp_entry_t entry = { .val = page_private(page), }; + int type = swp_type(entry); + struct swap_info_struct *sis = swap_info[type]; + pgoff_t offset = swp_offset(entry); + + BUG_ON(!PageLocked(page)); + BUG_ON(sis == NULL); + if (frontswap_test(sis, offset)) + dup = 1; + ret = (*frontswap_ops.put_page)(type, offset, page); + if (ret == 0) { + frontswap_set(sis, offset); + inc_frontswap_succ_puts(); + if (!dup) + atomic_inc(&sis->frontswap_pages); + } else if (dup) { + /* + failed dup always results in automatic invalidate of + the (older) page from frontswap + */ + frontswap_clear(sis, offset); + atomic_dec(&sis->frontswap_pages); + inc_frontswap_failed_puts(); + } else + inc_frontswap_failed_puts(); + if (frontswap_writethrough_enabled) + /* report failure so swap also writes to swap device */ + ret = -1; + return ret; +} +EXPORT_SYMBOL(__frontswap_put_page); + +/* + * "Get" data from frontswap associated with swaptype and offset that were + * specified when the data was put to frontswap and use it to fill the + * specified page with data. Page must be locked and in the swap cache. + */ +int __frontswap_get_page(struct page *page) +{ + int ret = -1; + swp_entry_t entry = { .val = page_private(page), }; + int type = swp_type(entry); + struct swap_info_struct *sis = swap_info[type]; + pgoff_t offset = swp_offset(entry); + + BUG_ON(!PageLocked(page)); + BUG_ON(sis == NULL); + if (frontswap_test(sis, offset)) + ret = (*frontswap_ops.get_page)(type, offset, page); + if (ret == 0) + inc_frontswap_gets(); + return ret; +} +EXPORT_SYMBOL(__frontswap_get_page); + +/* + * Invalidate any data from frontswap associated with the specified swaptype + * and offset so that a subsequent "get" will fail. + */ +void __frontswap_invalidate_page(unsigned type, pgoff_t offset) +{ + struct swap_info_struct *sis = swap_info[type]; + + BUG_ON(sis == NULL); + if (frontswap_test(sis, offset)) { + (*frontswap_ops.invalidate_page)(type, offset); + atomic_dec(&sis->frontswap_pages); + frontswap_clear(sis, offset); + inc_frontswap_invalidates(); + } +} +EXPORT_SYMBOL(__frontswap_invalidate_page); + +/* + * Invalidate all data from frontswap associated with all offsets for the + * specified swaptype. + */ +void __frontswap_invalidate_area(unsigned type) +{ + struct swap_info_struct *sis = swap_info[type]; + + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + (*frontswap_ops.invalidate_area)(type); + atomic_set(&sis->frontswap_pages, 0); + memset(sis->frontswap_map, 0, sis->max / sizeof(long)); +} +EXPORT_SYMBOL(__frontswap_invalidate_area); + +/* + * Frontswap, like a true swap device, may unnecessarily retain pages + * under certain circumstances; "shrink" frontswap is essentially a + * "partial swapoff" and works by calling try_to_unuse to attempt to + * unuse enough frontswap pages to attempt to -- subject to memory + * constraints -- reduce the number of pages in frontswap to the + * number given in the parameter target_pages. + */ +void frontswap_shrink(unsigned long target_pages) +{ + struct swap_info_struct *si = NULL; + int si_frontswap_pages; + unsigned long total_pages = 0, total_pages_to_unuse; + unsigned long pages = 0, pages_to_unuse = 0; + int type; + bool locked = false; + + /* + * we don't want to hold swap_lock while doing a very + * lengthy try_to_unuse, but swap_list may change + * so restart scan from swap_list.head each time + */ + spin_lock(&swap_lock); + locked = true; + total_pages = 0; + for (type = swap_list.head; type >= 0; type = si->next) { + si = swap_info[type]; + total_pages += atomic_read(&si->frontswap_pages); + } + if (total_pages <= target_pages) + goto out; + total_pages_to_unuse = total_pages - target_pages; + for (type = swap_list.head; type >= 0; type = si->next) { + si = swap_info[type]; + si_frontswap_pages = atomic_read(&si->frontswap_pages); + if (total_pages_to_unuse < si_frontswap_pages) + pages = pages_to_unuse = total_pages_to_unuse; + else { + pages = si_frontswap_pages; + pages_to_unuse = 0; /* unuse all */ + } + /* ensure there is enough RAM to fetch pages from frontswap */ + if (security_vm_enough_memory_mm(current->mm, pages)) + continue; + vm_unacct_memory(pages); + break; + } + if (type < 0) + goto out; + locked = false; + spin_unlock(&swap_lock); + try_to_unuse(type, true, pages_to_unuse); +out: + if (locked) + spin_unlock(&swap_lock); + return; +} +EXPORT_SYMBOL(frontswap_shrink); + +/* + * Count and return the number of frontswap pages across all + * swap devices. This is exported so that backend drivers can + * determine current usage without reading debugfs. + */ +unsigned long frontswap_curr_pages(void) +{ + int type; + unsigned long totalpages = 0; + struct swap_info_struct *si = NULL; + + spin_lock(&swap_lock); + for (type = swap_list.head; type >= 0; type = si->next) { + si = swap_info[type]; + totalpages += atomic_read(&si->frontswap_pages); + } + spin_unlock(&swap_lock); + return totalpages; +} +EXPORT_SYMBOL(frontswap_curr_pages); + +static int __init init_frontswap(void) +{ +#ifdef CONFIG_DEBUG_FS + struct dentry *root = debugfs_create_dir("frontswap", NULL); + if (root == NULL) + return -ENXIO; + debugfs_create_u64("gets", S_IRUGO, root, &frontswap_gets); + debugfs_create_u64("succ_puts", S_IRUGO, root, &frontswap_succ_puts); + debugfs_create_u64("failed_puts", S_IRUGO, root, + &frontswap_failed_puts); + debugfs_create_u64("invalidates", S_IRUGO, + root, &frontswap_invalidates); +#endif + return 0; +} + +module_init(init_frontswap); From 27c6aec214264992603526d47da9dabddf3521b3 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:10:34 -0600 Subject: [PATCH 012/475] mm: frontswap: config and doc files This patch 4of4 adds configuration and documentation files including a FAQ. [v14: updated docs/FAQ to use zcache and RAMster as examples] [v10: no change] [v9: akpm@linux-foundation.org: sysfs->debugfs; no longer need Doc/ABI file] [v8: rebase to 3.0-rc4] [v7: rebase to 3.0-rc3] [v6: rebase to 3.0-rc1] [v5: change config default to n] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel Cc: Andrew Morton Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/vm/frontswap.txt | 278 +++++++++++++++++++++++++++++++++ mm/Kconfig | 17 ++ mm/Makefile | 1 + 3 files changed, 296 insertions(+) create mode 100644 Documentation/vm/frontswap.txt diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt new file mode 100644 index 00000000000..a9f731af0fa --- /dev/null +++ b/Documentation/vm/frontswap.txt @@ -0,0 +1,278 @@ +Frontswap provides a "transcendent memory" interface for swap pages. +In some environments, dramatic performance savings may be obtained because +swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk. + +(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends" +and the only necessary changes to the core kernel for transcendent memory; +all other supporting code -- the "backends" -- is implemented as drivers. +See the LWN.net article "Transcendent memory in a nutshell" for a detailed +overview of frontswap and related kernel parts: +https://lwn.net/Articles/454795/ ) + +Frontswap is so named because it can be thought of as the opposite of +a "backing" store for a swap device. The storage is assumed to be +a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming +to the requirements of transcendent memory (such as Xen's "tmem", or +in-kernel compressed memory, aka "zcache", or future RAM-like devices); +this pseudo-RAM device is not directly accessible or addressable by the +kernel and is of unknown and possibly time-varying size. The driver +links itself to frontswap by calling frontswap_register_ops to set the +frontswap_ops funcs appropriately and the functions it provides must +conform to certain policies as follows: + +An "init" prepares the device to receive frontswap pages associated +with the specified swap device number (aka "type"). A "put_page" will +copy the page to transcendent memory and associate it with the type and +offset associated with the page. A "get_page" will copy the page, if found, +from transcendent memory into kernel memory, but will NOT remove the page +from from transcendent memory. An "invalidate_page" will remove the page +from transcendent memory and an "invalidate_area" will remove ALL pages +associated with the swap type (e.g., like swapoff) and notify the "device" +to refuse further puts with that swap type. + +Once a page is successfully put, a matching get on the page will normally +succeed. So when the kernel finds itself in a situation where it needs +to swap out a page, it first attempts to use frontswap. If the put returns +success, the data has been successfully saved to transcendent memory and +a disk write and, if the data is later read back, a disk read are avoided. +If a put returns failure, transcendent memory has rejected the data, and the +page can be written to swap as usual. + +If a backend chooses, frontswap can be configured as a "writethrough +cache" by calling frontswap_writethrough(). In this mode, the reduction +in swap device writes is lost (and also a non-trivial performance advantage) +in order to allow the backend to arbitrarily "reclaim" space used to +store frontswap pages to more completely manage its memory usage. + +Note that if a page is put and the page already exists in transcendent memory +(a "duplicate" put), either the put succeeds and the data is overwritten, +or the put fails AND the page is invalidated. This ensures stale data may +never be obtained from frontswap. + +If properly configured, monitoring of frontswap is done via debugfs in +the /sys/kernel/debug/frontswap directory. The effectiveness of +frontswap can be measured (across all swap devices) with: + +failed_puts - how many put attempts have failed +gets - how many gets were attempted (all should succeed) +succ_puts - how many put attempts have succeeded +invalidates - how many invalidates were attempted + +A backend implementation may provide additional metrics. + +FAQ + +1) Where's the value? + +When a workload starts swapping, performance falls through the floor. +Frontswap significantly increases performance in many such workloads by +providing a clean, dynamic interface to read and write swap pages to +"transcendent memory" that is otherwise not directly addressable to the kernel. +This interface is ideal when data is transformed to a different form +and size (such as with compression) or secretly moved (as might be +useful for write-balancing for some RAM-like devices). Swap pages (and +evicted page-cache pages) are a great use for this kind of slower-than-RAM- +but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and +cleancache) interface to transcendent memory provides a nice way to read +and write -- and indirectly "name" -- the pages. + +Frontswap -- and cleancache -- with a fairly small impact on the kernel, +provides a huge amount of flexibility for more dynamic, flexible RAM +utilization in various system configurations: + +In the single kernel case, aka "zcache", pages are compressed and +stored in local memory, thus increasing the total anonymous pages +that can be safely kept in RAM. Zcache essentially trades off CPU +cycles used in compression/decompression for better memory utilization. +Benchmarks have shown little or no impact when memory pressure is +low while providing a significant performance improvement (25%+) +on some workloads under high memory pressure. + +"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory +support for clustered systems. Frontswap pages are locally compressed +as in zcache, but then "remotified" to another system's RAM. This +allows RAM to be dynamically load-balanced back-and-forth as needed, +i.e. when system A is overcommitted, it can swap to system B, and +vice versa. RAMster can also be configured as a memory server so +many servers in a cluster can swap, dynamically as needed, to a single +server configured with a large amount of RAM... without pre-configuring +how much of the RAM is available for each of the clients! + +In the virtual case, the whole point of virtualization is to statistically +multiplex physical resources acrosst the varying demands of multiple +virtual machines. This is really hard to do with RAM and efforts to do +it well with no kernel changes have essentially failed (except in some +well-publicized special-case workloads). +Specifically, the Xen Transcendent Memory backend allows otherwise +"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple +virtual machines, but the pages can be compressed and deduplicated to +optimize RAM utilization. And when guest OS's are induced to surrender +underutilized RAM (e.g. with "selfballooning"), sudden unexpected +memory pressure may result in swapping; frontswap allows those pages +to be swapped to and from hypervisor RAM (if overall host system memory +conditions allow), thus mitigating the potentially awful performance impact +of unplanned swapping. + +A KVM implementation is underway and has been RFC'ed to lkml. And, +using frontswap, investigation is also underway on the use of NVM as +a memory extension technology. + +2) Sure there may be performance advantages in some situations, but + what's the space/time overhead of frontswap? + +If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into +nothingness and the only overhead is a few extra bytes per swapon'ed +swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend" +registers, there is one extra global variable compared to zero for +every swap page read or written. If CONFIG_FRONTSWAP is enabled +AND a frontswap backend registers AND the backend fails every "put" +request (i.e. provides no memory despite claiming it might), +CPU overhead is still negligible -- and since every frontswap fail +precedes a swap page write-to-disk, the system is highly likely +to be I/O bound and using a small fraction of a percent of a CPU +will be irrelevant anyway. + +As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend +registers, one bit is allocated for every swap page for every swap +device that is swapon'd. This is added to the EIGHT bits (which +was sixteen until about 2.6.34) that the kernel already allocates +for every swap page for every swap device that is swapon'd. (Hugh +Dickins has observed that frontswap could probably steal one of +the existing eight bits, but let's worry about that minor optimization +later.) For very large swap disks (which are rare) on a standard +4K pagesize, this is 1MB per 32GB swap. + +When swap pages are stored in transcendent memory instead of written +out to disk, there is a side effect that this may create more memory +pressure that can potentially outweigh the other advantages. A +backend, such as zcache, must implement policies to carefully (but +dynamically) manage memory limits to ensure this doesn't happen. + +3) OK, how about a quick overview of what this frontswap patch does + in terms that a kernel hacker can grok? + +Let's assume that a frontswap "backend" has registered during +kernel initialization; this registration indicates that this +frontswap backend has access to some "memory" that is not directly +accessible by the kernel. Exactly how much memory it provides is +entirely dynamic and random. + +Whenever a swap-device is swapon'd frontswap_init() is called, +passing the swap device number (aka "type") as a parameter. +This notifies frontswap to expect attempts to "put" swap pages +associated with that number. + +Whenever the swap subsystem is readying a page to write to a swap +device (c.f swap_writepage()), frontswap_put_page is called. Frontswap +consults with the frontswap backend and if the backend says it does NOT +have room, frontswap_put_page returns -1 and the kernel swaps the page +to the swap device as normal. Note that the response from the frontswap +backend is unpredictable to the kernel; it may choose to never accept a +page, it could accept every ninth page, or it might accept every +page. But if the backend does accept a page, the data from the page +has already been copied and associated with the type and offset, +and the backend guarantees the persistence of the data. In this case, +frontswap sets a bit in the "frontswap_map" for the swap device +corresponding to the page offset on the swap device to which it would +otherwise have written the data. + +When the swap subsystem needs to swap-in a page (swap_readpage()), +it first calls frontswap_get_page() which checks the frontswap_map to +see if the page was earlier accepted by the frontswap backend. If +it was, the page of data is filled from the frontswap backend and +the swap-in is complete. If not, the normal swap-in code is +executed to obtain the page of data from the real swap device. + +So every time the frontswap backend accepts a page, a swap device read +and (potentially) a swap device write are replaced by a "frontswap backend +put" and (possibly) a "frontswap backend get", which are presumably much +faster. + +4) Can't frontswap be configured as a "special" swap device that is + just higher priority than any real swap device (e.g. like zswap, + or maybe swap-over-nbd/NFS)? + +No. First, the existing swap subsystem doesn't allow for any kind of +swap hierarchy. Perhaps it could be rewritten to accomodate a hierarchy, +but this would require fairly drastic changes. Even if it were +rewritten, the existing swap subsystem uses the block I/O layer which +assumes a swap device is fixed size and any page in it is linearly +addressable. Frontswap barely touches the existing swap subsystem, +and works around the constraints of the block I/O subsystem to provide +a great deal of flexibility and dynamicity. + +For example, the acceptance of any swap page by the frontswap backend is +entirely unpredictable. This is critical to the definition of frontswap +backends because it grants completely dynamic discretion to the +backend. In zcache, one cannot know a priori how compressible a page is. +"Poorly" compressible pages can be rejected, and "poorly" can itself be +defined dynamically depending on current memory constraints. + +Further, frontswap is entirely synchronous whereas a real swap +device is, by definition, asynchronous and uses block I/O. The +block I/O layer is not only unnecessary, but may perform "optimizations" +that are inappropriate for a RAM-oriented device including delaying +the write of some pages for a significant amount of time. Synchrony is +required to ensure the dynamicity of the backend and to avoid thorny race +conditions that would unnecessarily and greatly complicate frontswap +and/or the block I/O subsystem. That said, only the initial "put" +and "get" operations need be synchronous. A separate asynchronous thread +is free to manipulate the pages stored by frontswap. For example, +the "remotification" thread in RAMster uses standard asynchronous +kernel sockets to move compressed frontswap pages to a remote machine. +Similarly, a KVM guest-side implementation could do in-guest compression +and use "batched" hypercalls. + +In a virtualized environment, the dynamicity allows the hypervisor +(or host OS) to do "intelligent overcommit". For example, it can +choose to accept pages only until host-swapping might be imminent, +then force guests to do their own swapping. + +There is a downside to the transcendent memory specifications for +frontswap: Since any "put" might fail, there must always be a real +slot on a real swap device to swap the page. Thus frontswap must be +implemented as a "shadow" to every swapon'd device with the potential +capability of holding every page that the swap device might have held +and the possibility that it might hold no pages at all. This means +that frontswap cannot contain more pages than the total of swapon'd +swap devices. For example, if NO swap device is configured on some +installation, frontswap is useless. Swapless portable devices +can still use frontswap but a backend for such devices must configure +some kind of "ghost" swap device and ensure that it is never used. + +5) Why this weird definition about "duplicate puts"? If a page + has been previously successfully put, can't it always be + successfully overwritten? + +Nearly always it can, but no, sometimes it cannot. Consider an example +where data is compressed and the original 4K page has been compressed +to 1K. Now an attempt is made to overwrite the page with data that +is non-compressible and so would take the entire 4K. But the backend +has no more space. In this case, the put must be rejected. Whenever +frontswap rejects a put that would overwrite, it also must invalidate +the old data and ensure that it is no longer accessible. Since the +swap subsystem then writes the new data to the read swap device, +this is the correct course of action to ensure coherency. + +6) What is frontswap_shrink for? + +When the (non-frontswap) swap subsystem swaps out a page to a real +swap device, that page is only taking up low-value pre-allocated disk +space. But if frontswap has placed a page in transcendent memory, that +page may be taking up valuable real estate. The frontswap_shrink +routine allows code outside of the swap subsystem to force pages out +of the memory managed by frontswap and back into kernel-addressable memory. +For example, in RAMster, a "suction driver" thread will attempt +to "repatriate" pages sent to a remote machine back to the local machine; +this is driven using the frontswap_shrink mechanism when memory pressure +subsides. + +7) Why does the frontswap patch create the new include file swapfile.h? + +The frontswap code depends on some swap-subsystem-internal data +structures that have, over the years, moved back and forth between +static and global. This seemed a reasonable compromise: Define +them as global but declare them in a new include file that isn't +included by the large number of source files that include swap.h. + +Dan Magenheimer, last updated April 9, 2012 diff --git a/mm/Kconfig b/mm/Kconfig index e338407f122..2613c910935 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -379,3 +379,20 @@ config CLEANCACHE in a negligible performance hit. If unsure, say Y to enable cleancache + +config FRONTSWAP + bool "Enable frontswap to cache swap pages if tmem is present" + depends on SWAP + default n + help + Frontswap is so named because it can be thought of as the opposite + of a "backing" store for a swap device. The data is stored into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. When space in transcendent memory is available, + a significant swap I/O reduction may be achieved. When none is + available, all frontswap calls are reduced to a single pointer- + compare-against-NULL resulting in a negligible performance hit + and swap data is stored as normal on the matching swap device. + + If unsure, say Y to enable frontswap. diff --git a/mm/Makefile b/mm/Makefile index 50ec00ef2a0..306742a2826 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o +obj-$(CONFIG_FRONTSWAP) += frontswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o From 839a1f79ed0ce026e6d1106f202eaaae929b4200 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 16 Apr 2012 17:06:35 -0400 Subject: [PATCH 013/475] MAINTAINER: Add myself for the frontswap API Signed-off-by: Konrad Rzeszutek Wilk --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2dcfca85063..bc8905dc225 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2876,6 +2876,13 @@ F: Documentation/power/freezing-of-tasks.txt F: include/linux/freezer.h F: kernel/freezer.c +FRONTSWAP API +M: Konrad Rzeszutek Wilk +L: linux-kernel@vger.kernel.org +S: Maintained +F: mm/frontswap.c +F: include/linux/frontswap.h + FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS M: David Howells L: linux-cachefs@redhat.com From 165c8aed5bbc6bdddbccae0ba9db451732558ff9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 15 May 2012 11:32:15 -0400 Subject: [PATCH 014/475] frontswap: s/put_page/store/g s/get_page/load Sounds so much more natural. Suggested-by: Andrea Arcangeli Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/vm/frontswap.txt | 50 ++++++++++++------------ drivers/staging/ramster/zcache-main.c | 8 ++-- drivers/staging/zcache/zcache-main.c | 10 ++--- drivers/xen/tmem.c | 8 ++-- include/linux/frontswap.h | 16 ++++---- mm/frontswap.c | 56 +++++++++++++-------------- mm/page_io.c | 4 +- 7 files changed, 76 insertions(+), 76 deletions(-) diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt index a9f731af0fa..37067cf455f 100644 --- a/Documentation/vm/frontswap.txt +++ b/Documentation/vm/frontswap.txt @@ -21,21 +21,21 @@ frontswap_ops funcs appropriately and the functions it provides must conform to certain policies as follows: An "init" prepares the device to receive frontswap pages associated -with the specified swap device number (aka "type"). A "put_page" will +with the specified swap device number (aka "type"). A "store" will copy the page to transcendent memory and associate it with the type and -offset associated with the page. A "get_page" will copy the page, if found, +offset associated with the page. A "load" will copy the page, if found, from transcendent memory into kernel memory, but will NOT remove the page from from transcendent memory. An "invalidate_page" will remove the page from transcendent memory and an "invalidate_area" will remove ALL pages associated with the swap type (e.g., like swapoff) and notify the "device" -to refuse further puts with that swap type. +to refuse further stores with that swap type. -Once a page is successfully put, a matching get on the page will normally +Once a page is successfully stored, a matching load on the page will normally succeed. So when the kernel finds itself in a situation where it needs -to swap out a page, it first attempts to use frontswap. If the put returns +to swap out a page, it first attempts to use frontswap. If the store returns success, the data has been successfully saved to transcendent memory and a disk write and, if the data is later read back, a disk read are avoided. -If a put returns failure, transcendent memory has rejected the data, and the +If a store returns failure, transcendent memory has rejected the data, and the page can be written to swap as usual. If a backend chooses, frontswap can be configured as a "writethrough @@ -44,18 +44,18 @@ in swap device writes is lost (and also a non-trivial performance advantage) in order to allow the backend to arbitrarily "reclaim" space used to store frontswap pages to more completely manage its memory usage. -Note that if a page is put and the page already exists in transcendent memory -(a "duplicate" put), either the put succeeds and the data is overwritten, -or the put fails AND the page is invalidated. This ensures stale data may +Note that if a page is stored and the page already exists in transcendent memory +(a "duplicate" store), either the store succeeds and the data is overwritten, +or the store fails AND the page is invalidated. This ensures stale data may never be obtained from frontswap. If properly configured, monitoring of frontswap is done via debugfs in the /sys/kernel/debug/frontswap directory. The effectiveness of frontswap can be measured (across all swap devices) with: -failed_puts - how many put attempts have failed -gets - how many gets were attempted (all should succeed) -succ_puts - how many put attempts have succeeded +failed_stores - how many store attempts have failed +loads - how many loads were attempted (all should succeed) +succ_stores - how many store attempts have succeeded invalidates - how many invalidates were attempted A backend implementation may provide additional metrics. @@ -125,7 +125,7 @@ nothingness and the only overhead is a few extra bytes per swapon'ed swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend" registers, there is one extra global variable compared to zero for every swap page read or written. If CONFIG_FRONTSWAP is enabled -AND a frontswap backend registers AND the backend fails every "put" +AND a frontswap backend registers AND the backend fails every "store" request (i.e. provides no memory despite claiming it might), CPU overhead is still negligible -- and since every frontswap fail precedes a swap page write-to-disk, the system is highly likely @@ -159,13 +159,13 @@ entirely dynamic and random. Whenever a swap-device is swapon'd frontswap_init() is called, passing the swap device number (aka "type") as a parameter. -This notifies frontswap to expect attempts to "put" swap pages +This notifies frontswap to expect attempts to "store" swap pages associated with that number. Whenever the swap subsystem is readying a page to write to a swap -device (c.f swap_writepage()), frontswap_put_page is called. Frontswap +device (c.f swap_writepage()), frontswap_store is called. Frontswap consults with the frontswap backend and if the backend says it does NOT -have room, frontswap_put_page returns -1 and the kernel swaps the page +have room, frontswap_store returns -1 and the kernel swaps the page to the swap device as normal. Note that the response from the frontswap backend is unpredictable to the kernel; it may choose to never accept a page, it could accept every ninth page, or it might accept every @@ -177,7 +177,7 @@ corresponding to the page offset on the swap device to which it would otherwise have written the data. When the swap subsystem needs to swap-in a page (swap_readpage()), -it first calls frontswap_get_page() which checks the frontswap_map to +it first calls frontswap_load() which checks the frontswap_map to see if the page was earlier accepted by the frontswap backend. If it was, the page of data is filled from the frontswap backend and the swap-in is complete. If not, the normal swap-in code is @@ -185,7 +185,7 @@ executed to obtain the page of data from the real swap device. So every time the frontswap backend accepts a page, a swap device read and (potentially) a swap device write are replaced by a "frontswap backend -put" and (possibly) a "frontswap backend get", which are presumably much +store" and (possibly) a "frontswap backend loads", which are presumably much faster. 4) Can't frontswap be configured as a "special" swap device that is @@ -215,8 +215,8 @@ that are inappropriate for a RAM-oriented device including delaying the write of some pages for a significant amount of time. Synchrony is required to ensure the dynamicity of the backend and to avoid thorny race conditions that would unnecessarily and greatly complicate frontswap -and/or the block I/O subsystem. That said, only the initial "put" -and "get" operations need be synchronous. A separate asynchronous thread +and/or the block I/O subsystem. That said, only the initial "store" +and "load" operations need be synchronous. A separate asynchronous thread is free to manipulate the pages stored by frontswap. For example, the "remotification" thread in RAMster uses standard asynchronous kernel sockets to move compressed frontswap pages to a remote machine. @@ -229,7 +229,7 @@ choose to accept pages only until host-swapping might be imminent, then force guests to do their own swapping. There is a downside to the transcendent memory specifications for -frontswap: Since any "put" might fail, there must always be a real +frontswap: Since any "store" might fail, there must always be a real slot on a real swap device to swap the page. Thus frontswap must be implemented as a "shadow" to every swapon'd device with the potential capability of holding every page that the swap device might have held @@ -240,16 +240,16 @@ installation, frontswap is useless. Swapless portable devices can still use frontswap but a backend for such devices must configure some kind of "ghost" swap device and ensure that it is never used. -5) Why this weird definition about "duplicate puts"? If a page - has been previously successfully put, can't it always be +5) Why this weird definition about "duplicate stores"? If a page + has been previously successfully stored, can't it always be successfully overwritten? Nearly always it can, but no, sometimes it cannot. Consider an example where data is compressed and the original 4K page has been compressed to 1K. Now an attempt is made to overwrite the page with data that is non-compressible and so would take the entire 4K. But the backend -has no more space. In this case, the put must be rejected. Whenever -frontswap rejects a put that would overwrite, it also must invalidate +has no more space. In this case, the store must be rejected. Whenever +frontswap rejects a store that would overwrite, it also must invalidate the old data and ensure that it is no longer accessible. Since the swap subsystem then writes the new data to the read swap device, this is the correct course of action to ensure coherency. diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c index 68b2e053a0e..2627b3de0d2 100644 --- a/drivers/staging/ramster/zcache-main.c +++ b/drivers/staging/ramster/zcache-main.c @@ -3002,7 +3002,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3025,7 +3025,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3080,8 +3080,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .put_page = zcache_frontswap_put_page, - .get_page = zcache_frontswap_get_page, + .store = zcache_frontswap_store, + .load = zcache_frontswap_load, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 2734dacacba..784c796b984 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1835,7 +1835,7 @@ static int zcache_frontswap_poolid = -1; * Swizzling increases objects per swaptype, increasing tmem concurrency * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from - * frontswap_get_page(), but has side-effects. Hence using 8. + * frontswap_load(), but has side-effects. Hence using 8. */ #define SWIZ_BITS 8 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) @@ -1849,7 +1849,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1870,7 +1870,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1919,8 +1919,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .put_page = zcache_frontswap_put_page, - .get_page = zcache_frontswap_get_page, + .store = zcache_frontswap_store, + .load = zcache_frontswap_load, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index dcb79521e6c..89f264c6742 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -269,7 +269,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) } /* returns 0 if the page was successfully put into frontswap, -1 if not */ -static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, +static int tmem_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -295,7 +295,7 @@ static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, * returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, +static int tmem_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -362,8 +362,8 @@ static int __init no_frontswap(char *s) __setup("nofrontswap", no_frontswap); static struct frontswap_ops __initdata tmem_frontswap_ops = { - .put_page = tmem_frontswap_put_page, - .get_page = tmem_frontswap_get_page, + .store = tmem_frontswap_store, + .load = tmem_frontswap_load, .invalidate_page = tmem_frontswap_flush_page, .invalidate_area = tmem_frontswap_flush_area, .init = tmem_frontswap_init diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 68ff7af5c5f..0e4e2eec5c1 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -7,8 +7,8 @@ struct frontswap_ops { void (*init)(unsigned); - int (*put_page)(unsigned, pgoff_t, struct page *); - int (*get_page)(unsigned, pgoff_t, struct page *); + int (*store)(unsigned, pgoff_t, struct page *); + int (*load)(unsigned, pgoff_t, struct page *); void (*invalidate_page)(unsigned, pgoff_t); void (*invalidate_area)(unsigned); }; @@ -21,8 +21,8 @@ extern unsigned long frontswap_curr_pages(void); extern void frontswap_writethrough(bool); extern void __frontswap_init(unsigned type); -extern int __frontswap_put_page(struct page *page); -extern int __frontswap_get_page(struct page *page); +extern int __frontswap_store(struct page *page); +extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); @@ -88,21 +88,21 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) } #endif -static inline int frontswap_put_page(struct page *page) +static inline int frontswap_store(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_put_page(page); + ret = __frontswap_store(page); return ret; } -static inline int frontswap_get_page(struct page *page) +static inline int frontswap_load(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_get_page(page); + ret = __frontswap_load(page); return ret; } diff --git a/mm/frontswap.c b/mm/frontswap.c index 8c0a5f8683f..e25025574a0 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -39,7 +39,7 @@ bool frontswap_enabled __read_mostly; EXPORT_SYMBOL(frontswap_enabled); /* - * If enabled, frontswap_put will return failure even on success. As + * If enabled, frontswap_store will return failure even on success. As * a result, the swap subsystem will always write the page to swap, in * effect converting frontswap into a writethrough cache. In this mode, * there is no direct reduction in swap writes, but a frontswap backend @@ -54,27 +54,27 @@ static bool frontswap_writethrough_enabled __read_mostly; * properly configured). These are for information only so are not protected * against increment races. */ -static u64 frontswap_gets; -static u64 frontswap_succ_puts; -static u64 frontswap_failed_puts; +static u64 frontswap_loads; +static u64 frontswap_succ_stores; +static u64 frontswap_failed_stores; static u64 frontswap_invalidates; -static inline void inc_frontswap_gets(void) { - frontswap_gets++; +static inline void inc_frontswap_loads(void) { + frontswap_loads++; } -static inline void inc_frontswap_succ_puts(void) { - frontswap_succ_puts++; +static inline void inc_frontswap_succ_stores(void) { + frontswap_succ_stores++; } -static inline void inc_frontswap_failed_puts(void) { - frontswap_failed_puts++; +static inline void inc_frontswap_failed_stores(void) { + frontswap_failed_stores++; } static inline void inc_frontswap_invalidates(void) { frontswap_invalidates++; } #else -static inline void inc_frontswap_gets(void) { } -static inline void inc_frontswap_succ_puts(void) { } -static inline void inc_frontswap_failed_puts(void) { } +static inline void inc_frontswap_loads(void) { } +static inline void inc_frontswap_succ_stores(void) { } +static inline void inc_frontswap_failed_stores(void) { } static inline void inc_frontswap_invalidates(void) { } #endif /* @@ -116,13 +116,13 @@ void __frontswap_init(unsigned type) EXPORT_SYMBOL(__frontswap_init); /* - * "Put" data from a page to frontswap and associate it with the page's + * "Store" data from a page to frontswap and associate it with the page's * swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and * offset, the frontswap implmentation may either overwrite the data and * return success or invalidate the page from frontswap and return failure. */ -int __frontswap_put_page(struct page *page) +int __frontswap_store(struct page *page) { int ret = -1, dup = 0; swp_entry_t entry = { .val = page_private(page), }; @@ -134,10 +134,10 @@ int __frontswap_put_page(struct page *page) BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; - ret = (*frontswap_ops.put_page)(type, offset, page); + ret = (*frontswap_ops.store)(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); - inc_frontswap_succ_puts(); + inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); } else if (dup) { @@ -147,22 +147,22 @@ int __frontswap_put_page(struct page *page) */ frontswap_clear(sis, offset); atomic_dec(&sis->frontswap_pages); - inc_frontswap_failed_puts(); + inc_frontswap_failed_stores(); } else - inc_frontswap_failed_puts(); + inc_frontswap_failed_stores(); if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ ret = -1; return ret; } -EXPORT_SYMBOL(__frontswap_put_page); +EXPORT_SYMBOL(__frontswap_store); /* * "Get" data from frontswap associated with swaptype and offset that were * specified when the data was put to frontswap and use it to fill the * specified page with data. Page must be locked and in the swap cache. */ -int __frontswap_get_page(struct page *page) +int __frontswap_load(struct page *page) { int ret = -1; swp_entry_t entry = { .val = page_private(page), }; @@ -173,12 +173,12 @@ int __frontswap_get_page(struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) - ret = (*frontswap_ops.get_page)(type, offset, page); + ret = (*frontswap_ops.load)(type, offset, page); if (ret == 0) - inc_frontswap_gets(); + inc_frontswap_loads(); return ret; } -EXPORT_SYMBOL(__frontswap_get_page); +EXPORT_SYMBOL(__frontswap_load); /* * Invalidate any data from frontswap associated with the specified swaptype @@ -301,10 +301,10 @@ static int __init init_frontswap(void) struct dentry *root = debugfs_create_dir("frontswap", NULL); if (root == NULL) return -ENXIO; - debugfs_create_u64("gets", S_IRUGO, root, &frontswap_gets); - debugfs_create_u64("succ_puts", S_IRUGO, root, &frontswap_succ_puts); - debugfs_create_u64("failed_puts", S_IRUGO, root, - &frontswap_failed_puts); + debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); + debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); + debugfs_create_u64("failed_stores", S_IRUGO, root, + &frontswap_failed_stores); debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif diff --git a/mm/page_io.c b/mm/page_io.c index 651a9125931..34f02923744 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -99,7 +99,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } - if (frontswap_put_page(page) == 0) { + if (frontswap_store(page) == 0) { set_page_writeback(page); unlock_page(page); end_page_writeback(page); @@ -129,7 +129,7 @@ int swap_readpage(struct page *page) VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageUptodate(page)); - if (frontswap_get_page(page) == 0) { + if (frontswap_load(page) == 0) { SetPageUptodate(page); unlock_page(page); goto out; From 62be73eafaa045d3233337303fb140f7f8a61135 Mon Sep 17 00:00:00 2001 From: Seiji Aguchi Date: Tue, 15 May 2012 17:35:09 -0400 Subject: [PATCH 015/475] kdump: Execute kmsg_dump(KMSG_DUMP_PANIC) after smp_send_stop() This patch moves kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop(), to serialize the crash-logging process via smp_send_stop() and to thus retrieve a more stable crash image of all CPUs stopped. Signed-off-by: Seiji Aguchi Acked-by: Don Zickus Cc: dle-develop@lists.sourceforge.net Cc: Satoru Moriya Cc: Tony Luck Cc: a.p.zijlstra@chello.nl Link: http://lkml.kernel.org/r/5C4C569E8A4B9B42A84A977CF070A35B2E4D7A5CE2@USINDEVS01.corp.hds.com Signed-off-by: Ingo Molnar --- kernel/panic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index b6215b7ce99..d2a5f4ecc6d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -108,8 +108,6 @@ void panic(const char *fmt, ...) */ crash_kexec(NULL); - kmsg_dump(KMSG_DUMP_PANIC); - /* * Note smp_send_stop is the usual smp shutdown function, which * unfortunately means it may not be hardened to work in a panic @@ -117,6 +115,8 @@ void panic(const char *fmt, ...) */ smp_send_stop(); + kmsg_dump(KMSG_DUMP_PANIC); + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); bust_spinlocks(0); From 936c163ab260b83b10d434715bf6bcd43b99d620 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 24 May 2012 13:03:20 +0900 Subject: [PATCH 016/475] sh: arch/sh/kernel/process.c needs asm/fpu.h for unlazy_fpu(). Linus tried to fix up sh fallout from the x86 fpu state cleanup merge and failed. Add the missing include to get it building again. CC arch/sh/kernel/process.o arch/sh/kernel/process.c: In function 'arch_dup_task_struct': arch/sh/kernel/process.c:23:2: error: implicit declaration of function 'unlazy_fpu' Signed-off-by: Paul Mundt --- arch/sh/kernel/process.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 9b7a459a461..055d91b7030 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -4,6 +4,7 @@ #include #include #include +#include struct kmem_cache *task_xstate_cachep = NULL; unsigned int xstate_size; From 3a9485da969c1c23400430470db17c819ef51d7b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 24 May 2012 15:07:47 +0900 Subject: [PATCH 017/475] sh64: Fix up fallout from generic init_task conversion. In the generic init_task migration sh64 silently lost its fake_swapper_regs definition, resulting in link errors. Signed-off-by: Paul Mundt --- arch/sh/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 4264583eaba..602545b12a8 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -33,6 +33,7 @@ #include struct task_struct *last_task_used_math = NULL; +struct pt_regs fake_swapper_regs = { 0, }; void show_regs(struct pt_regs *regs) { From 2e5ad6b9c45d43cc4e7b8ac5ded1c55a7c4a3893 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 12:53:11 -0400 Subject: [PATCH 018/475] xen/hvc: Collapse error logic. All of the error paths are doing the same logic. In which case we might as well collapse them in one path. CC: stable@kernel.org Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- drivers/tty/hvc/hvc_xen.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index d3d91dae065..afc7fc27aa5 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -216,22 +216,16 @@ static int xen_hvm_console_init(void) return 0; r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); - if (r < 0) { - kfree(info); - return -ENODEV; - } + if (r < 0) + goto err; info->evtchn = v; hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); - if (r < 0) { - kfree(info); - return -ENODEV; - } + if (r < 0) + goto err; mfn = v; info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE); - if (info->intf == NULL) { - kfree(info); - return -ENODEV; - } + if (info->intf == NULL) + goto err; info->vtermno = HVC_COOKIE; spin_lock(&xencons_lock); @@ -239,6 +233,9 @@ static int xen_hvm_console_init(void) spin_unlock(&xencons_lock); return 0; +err: + kfree(info); + return -ENODEV; } static int xen_pv_console_init(void) From a32c88b9386ce3df87f28dd46bdc3776cd6edf75 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 12:55:38 -0400 Subject: [PATCH 019/475] xen/hvc: Fix error cases around HVM_PARAM_CONSOLE_PFN We weren't resetting the parameter to be passed in to a known default. Nor were we checking the return value of hvm_get_parameter. CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/tty/hvc/hvc_xen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index afc7fc27aa5..3277f0eec4a 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -219,7 +219,8 @@ static int xen_hvm_console_init(void) if (r < 0) goto err; info->evtchn = v; - hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); + v = 0; + r = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); if (r < 0) goto err; mfn = v; From 5842f5768599094758931b74190cdf93641a8e35 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 12:56:59 -0400 Subject: [PATCH 020/475] xen/hvc: Check HVM_PARAM_CONSOLE_[EVTCHN|PFN] for correctness. We need to make sure that those parameters are setup to be correct. As such the value of 0 is deemed invalid and we find that we bail out. The hypervisor sets by default all of them to be zero and when the hypercall is done does a simple: a.value = d->arch.hvm_domain.params[a.index]; Which means that if the Xen toolstack forgot to setup the proper HVM_PARAM_CONSOLE_EVTCHN (or the PFN one), we would get the default value of 0 and use that. CC: stable@kernel.org Fixes-Oracle-Bug: 14091238 Signed-off-by: Konrad Rzeszutek Wilk --- drivers/tty/hvc/hvc_xen.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 3277f0eec4a..944eaeb8e0c 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -214,14 +214,19 @@ static int xen_hvm_console_init(void) /* already configured */ if (info->intf != NULL) return 0; - + /* + * If the toolstack (or the hypervisor) hasn't set these values, the + * default value is 0. Even though mfn = 0 and evtchn = 0 are + * theoretically correct values, in practice they never are and they + * mean that a legacy toolstack hasn't initialized the pv console correctly. + */ r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); - if (r < 0) + if (r < 0 || v == 0) goto err; info->evtchn = v; v = 0; r = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); - if (r < 0) + if (r < 0 || v == 0) goto err; mfn = v; info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE); From 23812b9d9e497580d38c62ebdc6f308733b0a32a Mon Sep 17 00:00:00 2001 From: Ning Jiang Date: Tue, 22 May 2012 00:19:20 +0800 Subject: [PATCH 021/475] genirq: Add IRQS_PENDING for nested and simple irq Every interrupt which is an active wakeup source needs the ability to abort suspend if there is a pending irq. Right now only edge and level irqs can do that. | +---------+ | INTC | +---------+ | GPIO_IRQ +------------+ | gpio-exp | +------------+ | | GPIO0_IRQ GPIO1_IRQ In the above diagram, gpio expander has irq number GPIO_IRQ, it is connected with two sub GPIO pins, GPIO0 and GPIO1. During suspend, we set IRQF_NO_SUSPEND for GPIO_IRQ so that gpio expander driver can handle the sub irq GPIO0_IRQ and GPIO1_IRQ, and these two irqs themselves can further be handled by simple or nested irq in some drivers(typically gpio and mfd driver). If they are used as wakeup sources during suspend, we want them to be able to abort suspend too. Setting IRQS_PENDING flag in handle_nested_irq() and handle_simple_irq() when the irq is disabled allows check_wakeup_irqs() to identify such irqs as source for aborting suspend. Signed-off-by: Ning Jiang Cc: rjw@sisk.pl Link: http://lkml.kernel.org/r/CAH3Oq6T905%2B3fkF43NAMMFvJvq7dsk_so6T2vQ8ZJrA5xiU3YA@mail.gmail.com Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index fc275e4f629..eebd6d5cfb4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -275,8 +275,10 @@ void handle_nested_irq(unsigned int irq) kstat_incr_irqs_this_cpu(irq, desc); action = desc->action; - if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) + if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; goto out_unlock; + } irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); raw_spin_unlock_irq(&desc->lock); @@ -324,8 +326,10 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); kstat_incr_irqs_this_cpu(irq, desc); - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; goto out_unlock; + } handle_irq_event(desc); From 818b0f3bfb236ae66cac3ff38e86b9e47f24b7aa Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 30 Mar 2012 23:11:34 +0800 Subject: [PATCH 022/475] genirq: Introduce irq_do_set_affinity() to reduce duplicated code All invocations of chip->irq_set_affinity() are doing the same return value checks. Let them all use a common function. [ tglx: removed the silly likely while at it ] Signed-off-by: Jiang Liu Cc: Jiang Liu Cc: Keping Chen Link: http://lkml.kernel.org/r/1333120296-13563-3-git-send-email-jiang.liu@huawei.com Signed-off-by: Thomas Gleixner --- kernel/irq/internals.h | 3 +++ kernel/irq/manage.c | 39 ++++++++++++++++++++++----------------- kernel/irq/migration.c | 13 ++----------- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 8e5c56b3b7d..001fa5bab49 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -101,6 +101,9 @@ extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask); extern void irq_set_thread_affinity(struct irq_desc *desc); +extern int irq_do_set_affinity(struct irq_data *data, + const struct cpumask *dest, bool force); + /* Inline functions for support of irq chips on slow busses */ static inline void chip_bus_lock(struct irq_desc *desc) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index bb32326afe8..a1b903380bc 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -139,6 +139,25 @@ static inline void irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } #endif +int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_desc *desc = irq_data_to_desc(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); + int ret; + + ret = chip->irq_set_affinity(data, mask, false); + switch (ret) { + case IRQ_SET_MASK_OK: + cpumask_copy(data->affinity, mask); + case IRQ_SET_MASK_OK_NOCOPY: + irq_set_thread_affinity(desc); + ret = 0; + } + + return ret; +} + int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) { struct irq_chip *chip = irq_data_get_irq_chip(data); @@ -149,14 +168,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return -EINVAL; if (irq_can_move_pcntxt(data)) { - ret = chip->irq_set_affinity(data, mask, false); - switch (ret) { - case IRQ_SET_MASK_OK: - cpumask_copy(data->affinity, mask); - case IRQ_SET_MASK_OK_NOCOPY: - irq_set_thread_affinity(desc); - ret = 0; - } + ret = irq_do_set_affinity(data, mask, false); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); @@ -280,9 +292,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); static int setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) { - struct irq_chip *chip = irq_desc_get_chip(desc); struct cpumask *set = irq_default_affinity; - int ret, node = desc->irq_data.node; + int node = desc->irq_data.node; /* Excludes PER_CPU and NO_BALANCE interrupts */ if (!irq_can_set_affinity(irq)) @@ -308,13 +319,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) if (cpumask_intersects(mask, nodemask)) cpumask_and(mask, mask, nodemask); } - ret = chip->irq_set_affinity(&desc->irq_data, mask, false); - switch (ret) { - case IRQ_SET_MASK_OK: - cpumask_copy(desc->irq_data.affinity, mask); - case IRQ_SET_MASK_OK_NOCOPY: - irq_set_thread_affinity(desc); - } + irq_do_set_affinity(&desc->irq_data, mask, false); return 0; } #else diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index c3c89751b32..ca3f4aaff70 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -42,17 +42,8 @@ void irq_move_masked_irq(struct irq_data *idata) * For correct operation this depends on the caller * masking the irqs. */ - if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) - < nr_cpu_ids)) { - int ret = chip->irq_set_affinity(&desc->irq_data, - desc->pending_mask, false); - switch (ret) { - case IRQ_SET_MASK_OK: - cpumask_copy(desc->irq_data.affinity, desc->pending_mask); - case IRQ_SET_MASK_OK_NOCOPY: - irq_set_thread_affinity(desc); - } - } + if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) + irq_do_set_affinity(&desc->irq_data, desc->pending_mask, false); cpumask_clear(desc->pending_mask); } From ee74d13229fb606353ff56f4927fa93b37e95bbe Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 24 May 2012 20:40:55 +0530 Subject: [PATCH 023/475] smpboot, idle: Optimize calls to smp_processor_id() in idle_threads_init() While trying to initialize idle threads for all cpus, idle_threads_init() calls smp_processor_id() in a loop, which is unnecessary. The intent is to initialize idle threads for all non-boot cpus. So just use a variable to note the boot cpu and use it in the loop. Signed-off-by: Srivatsa S. Bhat Cc: suresh.b.siddha@intel.com Cc: venki@google.com Cc: nikunj@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20120524151055.2549.64309.stgit@srivatsabhat.in.ibm.com Signed-off-by: Thomas Gleixner --- kernel/smpboot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index e1a797e028a..0f2162f808a 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -52,10 +52,12 @@ static inline void idle_init(unsigned int cpu) */ void __init idle_threads_init(void) { - unsigned int cpu; + unsigned int cpu, boot_cpu; + + boot_cpu = smp_processor_id(); for_each_possible_cpu(cpu) { - if (cpu != smp_processor_id()) + if (cpu != boot_cpu) idle_init(cpu); } } From 4a70d2d9909b43ed88043b98cabe2c7fbd563021 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 24 May 2012 20:41:00 +0530 Subject: [PATCH 024/475] smpboot, idle: Fix comment mismatch over idle_threads_init() The comment over idle_threads_init() really talks about the functionality of idle_init(). Move that comment to idle_init(), and add a suitable comment over idle_threads_init(). Signed-off-by: Srivatsa S. Bhat Cc: suresh.b.siddha@intel.com Cc: venki@google.com Cc: nikunj@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20120524151100.2549.66501.stgit@srivatsabhat.in.ibm.com Signed-off-by: Thomas Gleixner --- kernel/smpboot.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 0f2162f808a..98f60c5caa1 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -31,6 +31,12 @@ void __init idle_thread_set_boot_cpu(void) per_cpu(idle_threads, smp_processor_id()) = current; } +/** + * idle_init - Initialize the idle thread for a cpu + * @cpu: The cpu for which the idle thread should be initialized + * + * Creates the thread if it does not exist. + */ static inline void idle_init(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); @@ -45,10 +51,7 @@ static inline void idle_init(unsigned int cpu) } /** - * idle_thread_init - Initialize the idle thread for a cpu - * @cpu: The cpu for which the idle thread should be initialized - * - * Creates the thread if it does not exist. + * idle_threads_init - Initialize idle threads for all cpus */ void __init idle_threads_init(void) { From 5307c9556bc17e3cd26d4e94fc3b2565921834de Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 8 May 2012 12:20:58 +0200 Subject: [PATCH 025/475] tick: Add tick skew boot option Let the user decide whether power consumption or jitter is the more important consideration for their machines. Quoting removal commit af5ab277ded04bd9bc6b048c5a2f0e7d70ef0867: "Historically, Linux has tried to make the regular timer tick on the various CPUs not happen at the same time, to avoid contention on xtime_lock. Nowadays, with the tickless kernel, this contention no longer happens since time keeping and updating are done differently. In addition, this skew is actually hurting power consumption in a measurable way on many-core systems." Problems: - Contrary to the above, systems do encounter contention on both xtime_lock and RCU structure locks when the tick is synchronized. - Moderate sized RT systems suffer intolerable jitter due to the tick being synchronized. - SGI reports the same for their large systems. - Fully utilized systems reap no power saving benefit from skew removal, but do suffer from resulting induced lock contention. - 0209f649 rcu: limit rcu_node leaf-level fanout This patch was born to combat lock contention which testing showed to have been _induced by_ skew removal. Skew the tick, contention disappeared virtually completely. Signed-off-by: Mike Galbraith Link: http://lkml.kernel.org/r/1336472458.21924.78.camel@marge.simpson.net Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 9 +++++++++ kernel/time/tick-sched.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b69cfdc1211..ea38cd1f0ab 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2532,6 +2532,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sched_debug [KNL] Enables verbose scheduler debug messages. + skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate + xtime_lock contention on larger systems, and/or RCU lock + contention on all systems with CONFIG_MAXSMP set. + Format: { "0" | "1" } + 0 -- disable. (may be 1 via CONFIG_CMDLINE="skew_tick=1" + 1 -- enable. + Note: increases power consumption, thus should only be + enabled if running jitter sensitive (HPC/RT) workloads. + security= [SECURITY] Choose a security module to enable at boot. If this boot parameter is not specified, only the first security module asking for security registration will be diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9ff56..4eddbb5ea9c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -814,6 +814,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) return HRTIMER_RESTART; } +static int sched_skew_tick; + /** * tick_setup_sched_timer - setup the tick emulation timer */ @@ -831,6 +833,14 @@ void tick_setup_sched_timer(void) /* Get the next period (per cpu) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); + /* Offset the tick to avert xtime_lock contention. */ + if (sched_skew_tick) { + u64 offset = ktime_to_ns(tick_period) >> 1; + do_div(offset, num_possible_cpus()); + offset *= smp_processor_id(); + hrtimer_add_expires_ns(&ts->sched_timer, offset); + } + for (;;) { hrtimer_forward(&ts->sched_timer, now, tick_period); hrtimer_start_expires(&ts->sched_timer, @@ -910,3 +920,11 @@ int tick_check_oneshot_change(int allow_nohz) tick_nohz_switch_to_nohz(); return 0; } + +static int __init skew_tick(char *str) +{ + get_option(&str, &sched_skew_tick); + + return 0; +} +early_param("skew_tick", skew_tick); From e5400321a6f15ce0fe77c8455954f213ef7dcc54 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 9 May 2012 23:39:34 +0900 Subject: [PATCH 026/475] clockevents: Make clockevents_config() a global symbol Make clockevents_config() into a global symbol to allow it to be used by compiled-in clockevent drivers. This is needed by drivers that want to update the timer frequency after registration time. Signed-off-by: Magnus Damm Tested-by: Simon Horman Cc: arnd@arndb.de Cc: johnstul@us.ibm.com Cc: rjw@sisk.pl Cc: lethal@linux-sh.org Cc: gregkh@linuxfoundation.org Cc: olof@lixom.net Cc: Magnus Damm Link: http://lkml.kernel.org/r/20120509143934.27521.46553.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 + kernel/time/clockevents.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 81e803e90aa..acba894374a 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 9cd928f7a7c..7e1ce012a85 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -297,8 +297,7 @@ void clockevents_register_device(struct clock_event_device *dev) } EXPORT_SYMBOL_GPL(clockevents_register_device); -static void clockevents_config(struct clock_event_device *dev, - u32 freq) +void clockevents_config(struct clock_event_device *dev, u32 freq) { u64 sec; From b9dbf9517784084ee9496f9f17f9754c1d021a9e Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 25 May 2012 16:03:44 +0900 Subject: [PATCH 027/475] clocksource: em_sti: Emma Mobile STI driver The STI hardware is based on a single 48-bit 32kHz counter that together with two individual compare registers can generate interrupts. There are no timer operating modes selectable which means that the timer can not clear on match. This driver is providing clocksource support for the 48-bit counter. Clockevents are also supported using the same timer in oneshot mode. Signed-off-by: Magnus Damm Cc: horms@verge.net.au Cc: arnd@arndb.de Cc: johnstul@us.ibm.com Cc: rjw@sisk.pl Cc: lethal@linux-sh.org Cc: gregkh@linuxfoundation.org Cc: olof@lixom.net Link: http://lkml.kernel.org/r/20120525070344.23443.69756.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- arch/arm/mach-shmobile/Kconfig | 6 + drivers/clocksource/Makefile | 1 + drivers/clocksource/em_sti.c | 399 +++++++++++++++++++++++++++++++++ 3 files changed, 406 insertions(+) create mode 100644 drivers/clocksource/em_sti.c diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index f31383c32f9..df33909205e 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -186,6 +186,12 @@ config SH_TIMER_TMU help This enables build of the TMU timer driver. +config EM_TIMER_STI + bool "STI timer driver" + default y + help + This enables build of the STI timer driver. + endmenu config SH_CLK_CPG diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 8d81a1d3265..dd3e661a124 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o +obj-$(CONFIG_EM_TIMER_STI) += em_sti.o obj-$(CONFIG_CLKBLD_I8253) += i8253.o obj-$(CONFIG_CLKSRC_MMIO) += mmio.o obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c new file mode 100644 index 00000000000..719584a5952 --- /dev/null +++ b/drivers/clocksource/em_sti.c @@ -0,0 +1,399 @@ +/* + * Emma Mobile Timer Support - STI + * + * Copyright (C) 2012 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { USER_CLOCKSOURCE, USER_CLOCKEVENT, USER_NR }; + +struct em_sti_priv { + void __iomem *base; + struct clk *clk; + struct platform_device *pdev; + unsigned int active[USER_NR]; + unsigned long rate; + raw_spinlock_t lock; + struct clock_event_device ced; + struct clocksource cs; +}; + +#define STI_CONTROL 0x00 +#define STI_COMPA_H 0x10 +#define STI_COMPA_L 0x14 +#define STI_COMPB_H 0x18 +#define STI_COMPB_L 0x1c +#define STI_COUNT_H 0x20 +#define STI_COUNT_L 0x24 +#define STI_COUNT_RAW_H 0x28 +#define STI_COUNT_RAW_L 0x2c +#define STI_SET_H 0x30 +#define STI_SET_L 0x34 +#define STI_INTSTATUS 0x40 +#define STI_INTRAWSTATUS 0x44 +#define STI_INTENSET 0x48 +#define STI_INTENCLR 0x4c +#define STI_INTFFCLR 0x50 + +static inline unsigned long em_sti_read(struct em_sti_priv *p, int offs) +{ + return ioread32(p->base + offs); +} + +static inline void em_sti_write(struct em_sti_priv *p, int offs, + unsigned long value) +{ + iowrite32(value, p->base + offs); +} + +static int em_sti_enable(struct em_sti_priv *p) +{ + int ret; + + /* enable clock */ + ret = clk_enable(p->clk); + if (ret) { + dev_err(&p->pdev->dev, "cannot enable clock\n"); + return ret; + } + + /* configure channel, periodic mode and maximum timeout */ + p->rate = clk_get_rate(p->clk); + + /* reset the counter */ + em_sti_write(p, STI_SET_H, 0x40000000); + em_sti_write(p, STI_SET_L, 0x00000000); + + /* mask and clear pending interrupts */ + em_sti_write(p, STI_INTENCLR, 3); + em_sti_write(p, STI_INTFFCLR, 3); + + /* enable updates of counter registers */ + em_sti_write(p, STI_CONTROL, 1); + + return 0; +} + +static void em_sti_disable(struct em_sti_priv *p) +{ + /* mask interrupts */ + em_sti_write(p, STI_INTENCLR, 3); + + /* stop clock */ + clk_disable(p->clk); +} + +static cycle_t em_sti_count(struct em_sti_priv *p) +{ + cycle_t ticks; + unsigned long flags; + + /* the STI hardware buffers the 48-bit count, but to + * break it out into two 32-bit access the registers + * must be accessed in a certain order. + * Always read STI_COUNT_H before STI_COUNT_L. + */ + raw_spin_lock_irqsave(&p->lock, flags); + ticks = (cycle_t)(em_sti_read(p, STI_COUNT_H) & 0xffff) << 32; + ticks |= em_sti_read(p, STI_COUNT_L); + raw_spin_unlock_irqrestore(&p->lock, flags); + + return ticks; +} + +static cycle_t em_sti_set_next(struct em_sti_priv *p, cycle_t next) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&p->lock, flags); + + /* mask compare A interrupt */ + em_sti_write(p, STI_INTENCLR, 1); + + /* update compare A value */ + em_sti_write(p, STI_COMPA_H, next >> 32); + em_sti_write(p, STI_COMPA_L, next & 0xffffffff); + + /* clear compare A interrupt source */ + em_sti_write(p, STI_INTFFCLR, 1); + + /* unmask compare A interrupt */ + em_sti_write(p, STI_INTENSET, 1); + + raw_spin_unlock_irqrestore(&p->lock, flags); + + return next; +} + +static irqreturn_t em_sti_interrupt(int irq, void *dev_id) +{ + struct em_sti_priv *p = dev_id; + + p->ced.event_handler(&p->ced); + return IRQ_HANDLED; +} + +static int em_sti_start(struct em_sti_priv *p, unsigned int user) +{ + unsigned long flags; + int used_before; + int ret = 0; + + raw_spin_lock_irqsave(&p->lock, flags); + used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT]; + if (!used_before) + ret = em_sti_enable(p); + + if (!ret) + p->active[user] = 1; + raw_spin_unlock_irqrestore(&p->lock, flags); + + return ret; +} + +static void em_sti_stop(struct em_sti_priv *p, unsigned int user) +{ + unsigned long flags; + int used_before, used_after; + + raw_spin_lock_irqsave(&p->lock, flags); + used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT]; + p->active[user] = 0; + used_after = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT]; + + if (used_before && !used_after) + em_sti_disable(p); + raw_spin_unlock_irqrestore(&p->lock, flags); +} + +static struct em_sti_priv *cs_to_em_sti(struct clocksource *cs) +{ + return container_of(cs, struct em_sti_priv, cs); +} + +static cycle_t em_sti_clocksource_read(struct clocksource *cs) +{ + return em_sti_count(cs_to_em_sti(cs)); +} + +static int em_sti_clocksource_enable(struct clocksource *cs) +{ + int ret; + struct em_sti_priv *p = cs_to_em_sti(cs); + + ret = em_sti_start(p, USER_CLOCKSOURCE); + if (!ret) + __clocksource_updatefreq_hz(cs, p->rate); + return ret; +} + +static void em_sti_clocksource_disable(struct clocksource *cs) +{ + em_sti_stop(cs_to_em_sti(cs), USER_CLOCKSOURCE); +} + +static void em_sti_clocksource_resume(struct clocksource *cs) +{ + em_sti_clocksource_enable(cs); +} + +static int em_sti_register_clocksource(struct em_sti_priv *p) +{ + struct clocksource *cs = &p->cs; + + memset(cs, 0, sizeof(*cs)); + cs->name = dev_name(&p->pdev->dev); + cs->rating = 200; + cs->read = em_sti_clocksource_read; + cs->enable = em_sti_clocksource_enable; + cs->disable = em_sti_clocksource_disable; + cs->suspend = em_sti_clocksource_disable; + cs->resume = em_sti_clocksource_resume; + cs->mask = CLOCKSOURCE_MASK(48); + cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; + + dev_info(&p->pdev->dev, "used as clock source\n"); + + /* Register with dummy 1 Hz value, gets updated in ->enable() */ + clocksource_register_hz(cs, 1); + return 0; +} + +static struct em_sti_priv *ced_to_em_sti(struct clock_event_device *ced) +{ + return container_of(ced, struct em_sti_priv, ced); +} + +static void em_sti_clock_event_mode(enum clock_event_mode mode, + struct clock_event_device *ced) +{ + struct em_sti_priv *p = ced_to_em_sti(ced); + + /* deal with old setting first */ + switch (ced->mode) { + case CLOCK_EVT_MODE_ONESHOT: + em_sti_stop(p, USER_CLOCKEVENT); + break; + default: + break; + } + + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + dev_info(&p->pdev->dev, "used for oneshot clock events\n"); + em_sti_start(p, USER_CLOCKEVENT); + clockevents_config(&p->ced, p->rate); + break; + case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_UNUSED: + em_sti_stop(p, USER_CLOCKEVENT); + break; + default: + break; + } +} + +static int em_sti_clock_event_next(unsigned long delta, + struct clock_event_device *ced) +{ + struct em_sti_priv *p = ced_to_em_sti(ced); + cycle_t next; + int safe; + + next = em_sti_set_next(p, em_sti_count(p) + delta); + safe = em_sti_count(p) < (next - 1); + + return !safe; +} + +static void em_sti_register_clockevent(struct em_sti_priv *p) +{ + struct clock_event_device *ced = &p->ced; + + memset(ced, 0, sizeof(*ced)); + ced->name = dev_name(&p->pdev->dev); + ced->features = CLOCK_EVT_FEAT_ONESHOT; + ced->rating = 200; + ced->cpumask = cpumask_of(0); + ced->set_next_event = em_sti_clock_event_next; + ced->set_mode = em_sti_clock_event_mode; + + dev_info(&p->pdev->dev, "used for clock events\n"); + + /* Register with dummy 1 Hz value, gets updated in ->set_mode() */ + clockevents_config_and_register(ced, 1, 2, 0xffffffff); +} + +static int __devinit em_sti_probe(struct platform_device *pdev) +{ + struct em_sti_priv *p; + struct resource *res; + int irq, ret; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + ret = -ENOMEM; + goto err0; + } + + p->pdev = pdev; + platform_set_drvdata(pdev, p); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "failed to get I/O memory\n"); + ret = -EINVAL; + goto err0; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "failed to get irq\n"); + ret = -EINVAL; + goto err0; + } + + /* map memory, let base point to the STI instance */ + p->base = ioremap_nocache(res->start, resource_size(res)); + if (p->base == NULL) { + dev_err(&pdev->dev, "failed to remap I/O memory\n"); + ret = -ENXIO; + goto err0; + } + + /* get hold of clock */ + p->clk = clk_get(&pdev->dev, "sclk"); + if (IS_ERR(p->clk)) { + dev_err(&pdev->dev, "cannot get clock\n"); + ret = PTR_ERR(p->clk); + goto err1; + } + + if (request_irq(irq, em_sti_interrupt, + IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, + dev_name(&pdev->dev), p)) { + dev_err(&pdev->dev, "failed to request low IRQ\n"); + ret = -ENOENT; + goto err2; + } + + raw_spin_lock_init(&p->lock); + em_sti_register_clockevent(p); + em_sti_register_clocksource(p); + return 0; + +err2: + clk_put(p->clk); +err1: + iounmap(p->base); +err0: + kfree(p); + return ret; +} + +static int __devexit em_sti_remove(struct platform_device *pdev) +{ + return -EBUSY; /* cannot unregister clockevent and clocksource */ +} + +static struct platform_driver em_sti_device_driver = { + .probe = em_sti_probe, + .remove = __devexit_p(em_sti_remove), + .driver = { + .name = "em_sti", + } +}; + +module_platform_driver(em_sti_device_driver); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("Renesas Emma Mobile STI Timer Driver"); +MODULE_LICENSE("GPL v2"); From fc0830fe017d02b7b4995b5c402b484b65d9dfc6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 9 May 2012 23:39:50 +0900 Subject: [PATCH 028/475] clocksource: em_sti: Add DT support Update the em-sti driver to support DT. Signed-off-by: Magnus Damm Cc: arnd@arndb.de Cc: horms@verge.net.au Cc: johnstul@us.ibm.com Cc: rjw@sisk.pl Cc: lethal@linux-sh.org Cc: gregkh@linuxfoundation.org Cc: olof@lixom.net Link: http://lkml.kernel.org/r/20120509143950.27521.7949.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- drivers/clocksource/em_sti.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index 719584a5952..372051d1bba 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -384,11 +384,18 @@ static int __devexit em_sti_remove(struct platform_device *pdev) return -EBUSY; /* cannot unregister clockevent and clocksource */ } +static const struct of_device_id em_sti_dt_ids[] __devinitconst = { + { .compatible = "renesas,em-sti", }, + {}, +}; +MODULE_DEVICE_TABLE(of, em_sti_dt_ids); + static struct platform_driver em_sti_device_driver = { .probe = em_sti_probe, .remove = __devexit_p(em_sti_remove), .driver = { .name = "em_sti", + .of_match_table = em_sti_dt_ids, } }; From 62cf20b32aee4ae889a2eb40fd41c0eab73de970 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 14:08:57 +0200 Subject: [PATCH 029/475] tick: Move skew_tick option into the HIGH_RES_TIMER section commit 5307c95 (tick: Add tick skew boot option) broke the !CONFIG_HIGH_RES_TIMERS build. Move the boot option parsing into the CONFIG_HIGH_RES_TIMERS section. Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: Mike Galbraith --- kernel/time/tick-sched.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4eddbb5ea9c..efd38666753 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -816,6 +816,14 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) static int sched_skew_tick; +static int __init skew_tick(char *str) +{ + get_option(&str, &sched_skew_tick); + + return 0; +} +early_param("skew_tick", skew_tick); + /** * tick_setup_sched_timer - setup the tick emulation timer */ @@ -920,11 +928,3 @@ int tick_check_oneshot_change(int allow_nohz) tick_nohz_switch_to_nohz(); return 0; } - -static int __init skew_tick(char *str) -{ - get_option(&str, &sched_skew_tick); - - return 0; -} -early_param("skew_tick", skew_tick); From fa980ca87d15bb8a1317853f257a505990f3ffde Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 May 2012 08:24:39 -0700 Subject: [PATCH 030/475] cgroup: superblock can't be released with active dentries 48ddbe1946 "cgroup: make css->refcnt clearing on cgroup removal optional" allowed a css to linger after the associated cgroup is removed. As a css holds a reference on the cgroup's dentry, it means that cgroup dentries may linger for a while. cgroup_create() does grab an active reference on the superblock to prevent it from going away while there are !root cgroups; however, the reference is put from cgroup_diput() which is invoked on cgroup removal, so cgroup dentries which are removed but persisting due to lingering csses already have released their superblock active refs allowing superblock to be killed while those dentries are around. Given the right condition, this makes cgroup_kill_sb() call kill_litter_super() with dentries with non-zero d_count leading to BUG() in shrink_dcache_for_umount_subtree(). Fix it by adding cgroup_dops->d_release() operation and moving deactivate_super() to it. cgroup_diput() now marks dentry->d_fsdata with itself if superblock should be deactivated and cgroup_d_release() deactivates the superblock on dentry release. Signed-off-by: Tejun Heo Reported-by: Sasha Levin Tested-by: Sasha Levin LKML-Reference: Acked-by: Li Zefan --- kernel/cgroup.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ad8eae5bb80..e887b55f1f2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -896,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) mutex_unlock(&cgroup_mutex); /* - * Drop the active superblock reference that we took when we - * created the cgroup + * We want to drop the active superblock reference from the + * cgroup creation after all the dentry refs are gone - + * kill_sb gets mighty unhappy otherwise. Mark + * dentry->d_fsdata with cgroup_diput() to tell + * cgroup_d_release() to call deactivate_super(). */ - deactivate_super(cgrp->root->sb); + dentry->d_fsdata = cgroup_diput; /* * if we're getting rid of the cgroup, refcount should ensure @@ -925,6 +928,13 @@ static int cgroup_delete(const struct dentry *d) return 1; } +static void cgroup_d_release(struct dentry *dentry) +{ + /* did cgroup_diput() tell me to deactivate super? */ + if (dentry->d_fsdata == cgroup_diput) + deactivate_super(dentry->d_sb); +} + static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -1532,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb) static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, .d_delete = cgroup_delete, + .d_release = cgroup_d_release, }; struct inode *inode = From ec3d753c5f77ecce0f5d3b8ab603b54b4fd2a106 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 28 May 2012 11:01:44 +0900 Subject: [PATCH 031/475] sh: convert to kbuild asm-generic support. Straightforward change to get rid of all of the header wrapper stubs. Signed-off-by: Paul Mundt --- arch/sh/include/asm/Kbuild | 29 +++++++++++++++++++++++++ arch/sh/include/asm/bitsperlong.h | 1 - arch/sh/include/asm/cputime.h | 6 ----- arch/sh/include/asm/current.h | 1 - arch/sh/include/asm/div64.h | 1 - arch/sh/include/asm/emergency-restart.h | 6 ----- arch/sh/include/asm/errno.h | 6 ----- arch/sh/include/asm/fcntl.h | 1 - arch/sh/include/asm/ioctl.h | 1 - arch/sh/include/asm/ipcbuf.h | 1 - arch/sh/include/asm/irq_regs.h | 1 - arch/sh/include/asm/local.h | 7 ------ arch/sh/include/asm/mman.h | 1 - arch/sh/include/asm/msgbuf.h | 1 - arch/sh/include/asm/param.h | 1 - arch/sh/include/asm/parport.h | 1 - arch/sh/include/asm/percpu.h | 6 ----- arch/sh/include/asm/poll.h | 1 - arch/sh/include/asm/resource.h | 6 ----- arch/sh/include/asm/sembuf.h | 1 - arch/sh/include/asm/serial.h | 1 - arch/sh/include/asm/shmbuf.h | 1 - arch/sh/include/asm/siginfo.h | 6 ----- arch/sh/include/asm/socket.h | 1 - arch/sh/include/asm/statfs.h | 6 ----- arch/sh/include/asm/termbits.h | 1 - arch/sh/include/asm/termios.h | 1 - arch/sh/include/asm/ucontext.h | 1 - arch/sh/include/asm/xor.h | 1 - 29 files changed, 29 insertions(+), 69 deletions(-) delete mode 100644 arch/sh/include/asm/bitsperlong.h delete mode 100644 arch/sh/include/asm/cputime.h delete mode 100644 arch/sh/include/asm/current.h delete mode 100644 arch/sh/include/asm/div64.h delete mode 100644 arch/sh/include/asm/emergency-restart.h delete mode 100644 arch/sh/include/asm/errno.h delete mode 100644 arch/sh/include/asm/fcntl.h delete mode 100644 arch/sh/include/asm/ioctl.h delete mode 100644 arch/sh/include/asm/ipcbuf.h delete mode 100644 arch/sh/include/asm/irq_regs.h delete mode 100644 arch/sh/include/asm/local.h delete mode 100644 arch/sh/include/asm/mman.h delete mode 100644 arch/sh/include/asm/msgbuf.h delete mode 100644 arch/sh/include/asm/param.h delete mode 100644 arch/sh/include/asm/parport.h delete mode 100644 arch/sh/include/asm/percpu.h delete mode 100644 arch/sh/include/asm/poll.h delete mode 100644 arch/sh/include/asm/resource.h delete mode 100644 arch/sh/include/asm/sembuf.h delete mode 100644 arch/sh/include/asm/serial.h delete mode 100644 arch/sh/include/asm/shmbuf.h delete mode 100644 arch/sh/include/asm/siginfo.h delete mode 100644 arch/sh/include/asm/socket.h delete mode 100644 arch/sh/include/asm/statfs.h delete mode 100644 arch/sh/include/asm/termbits.h delete mode 100644 arch/sh/include/asm/termios.h delete mode 100644 arch/sh/include/asm/ucontext.h delete mode 100644 arch/sh/include/asm/xor.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 7beb42322f6..09d1e60ef47 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,5 +1,34 @@ include include/asm-generic/Kbuild.asm +generic-y += bitsperlong.h +generic-y += cputime.h +generic-y += current.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += local.h +generic-y += param.h +generic-y += parport.h +generic-y += percpu.h +generic-y += poll.h +generic-y += mman.h +generic-y += msgbuf.h +generic-y += resource.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += siginfo.h +generic-y += socket.h +generic-y += statfs.h +generic-y += termbits.h +generic-y += termios.h +generic-y += ucontext.h +generic-y += xor.h + header-y += cachectl.h header-y += cpu-features.h header-y += hw_breakpoint.h diff --git a/arch/sh/include/asm/bitsperlong.h b/arch/sh/include/asm/bitsperlong.h deleted file mode 100644 index 6dc0bb0c13b..00000000000 --- a/arch/sh/include/asm/bitsperlong.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/cputime.h b/arch/sh/include/asm/cputime.h deleted file mode 100644 index 6ca395d1393..00000000000 --- a/arch/sh/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SH_CPUTIME_H -#define __SH_CPUTIME_H - -#include - -#endif /* __SH_CPUTIME_H */ diff --git a/arch/sh/include/asm/current.h b/arch/sh/include/asm/current.h deleted file mode 100644 index 4c51401b553..00000000000 --- a/arch/sh/include/asm/current.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/div64.h b/arch/sh/include/asm/div64.h deleted file mode 100644 index 6cd978cefb2..00000000000 --- a/arch/sh/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/emergency-restart.h b/arch/sh/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42..00000000000 --- a/arch/sh/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/sh/include/asm/errno.h b/arch/sh/include/asm/errno.h deleted file mode 100644 index 51cf6f9cebb..00000000000 --- a/arch/sh/include/asm/errno.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_ERRNO_H -#define __ASM_SH_ERRNO_H - -#include - -#endif /* __ASM_SH_ERRNO_H */ diff --git a/arch/sh/include/asm/fcntl.h b/arch/sh/include/asm/fcntl.h deleted file mode 100644 index 46ab12db573..00000000000 --- a/arch/sh/include/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/ioctl.h b/arch/sh/include/asm/ioctl.h deleted file mode 100644 index b279fe06dfe..00000000000 --- a/arch/sh/include/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/ipcbuf.h b/arch/sh/include/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d..00000000000 --- a/arch/sh/include/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/irq_regs.h b/arch/sh/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b7027..00000000000 --- a/arch/sh/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/local.h b/arch/sh/include/asm/local.h deleted file mode 100644 index 9ed9b9cb459..00000000000 --- a/arch/sh/include/asm/local.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_SH_LOCAL_H -#define __ASM_SH_LOCAL_H - -#include - -#endif /* __ASM_SH_LOCAL_H */ - diff --git a/arch/sh/include/asm/mman.h b/arch/sh/include/asm/mman.h deleted file mode 100644 index 8eebf89f5ab..00000000000 --- a/arch/sh/include/asm/mman.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/msgbuf.h b/arch/sh/include/asm/msgbuf.h deleted file mode 100644 index 809134c644a..00000000000 --- a/arch/sh/include/asm/msgbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/param.h b/arch/sh/include/asm/param.h deleted file mode 100644 index 965d4542797..00000000000 --- a/arch/sh/include/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/parport.h b/arch/sh/include/asm/parport.h deleted file mode 100644 index cf252af6459..00000000000 --- a/arch/sh/include/asm/parport.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/percpu.h b/arch/sh/include/asm/percpu.h deleted file mode 100644 index 4db4b39a439..00000000000 --- a/arch/sh/include/asm/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ARCH_SH_PERCPU -#define __ARCH_SH_PERCPU - -#include - -#endif /* __ARCH_SH_PERCPU */ diff --git a/arch/sh/include/asm/poll.h b/arch/sh/include/asm/poll.h deleted file mode 100644 index c98509d3149..00000000000 --- a/arch/sh/include/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/resource.h b/arch/sh/include/asm/resource.h deleted file mode 100644 index 9c2499a86ec..00000000000 --- a/arch/sh/include/asm/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_RESOURCE_H -#define __ASM_SH_RESOURCE_H - -#include - -#endif /* __ASM_SH_RESOURCE_H */ diff --git a/arch/sh/include/asm/sembuf.h b/arch/sh/include/asm/sembuf.h deleted file mode 100644 index 7673b83cfef..00000000000 --- a/arch/sh/include/asm/sembuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/serial.h b/arch/sh/include/asm/serial.h deleted file mode 100644 index a0cb0caff15..00000000000 --- a/arch/sh/include/asm/serial.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/shmbuf.h b/arch/sh/include/asm/shmbuf.h deleted file mode 100644 index 83c05fc2de3..00000000000 --- a/arch/sh/include/asm/shmbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/siginfo.h b/arch/sh/include/asm/siginfo.h deleted file mode 100644 index 813040ed68a..00000000000 --- a/arch/sh/include/asm/siginfo.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_SIGINFO_H -#define __ASM_SH_SIGINFO_H - -#include - -#endif /* __ASM_SH_SIGINFO_H */ diff --git a/arch/sh/include/asm/socket.h b/arch/sh/include/asm/socket.h deleted file mode 100644 index 6b71384b9d8..00000000000 --- a/arch/sh/include/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/statfs.h b/arch/sh/include/asm/statfs.h deleted file mode 100644 index 9202a023328..00000000000 --- a/arch/sh/include/asm/statfs.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_STATFS_H -#define __ASM_SH_STATFS_H - -#include - -#endif /* __ASM_SH_STATFS_H */ diff --git a/arch/sh/include/asm/termbits.h b/arch/sh/include/asm/termbits.h deleted file mode 100644 index 3935b106de7..00000000000 --- a/arch/sh/include/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/termios.h b/arch/sh/include/asm/termios.h deleted file mode 100644 index 280d78a9d96..00000000000 --- a/arch/sh/include/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/ucontext.h b/arch/sh/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9f30f..00000000000 --- a/arch/sh/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/xor.h b/arch/sh/include/asm/xor.h deleted file mode 100644 index c82eb12a5b1..00000000000 --- a/arch/sh/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include From 2aa4ee2a8805ec0260dde971e9e6699917c868a7 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Mon, 28 May 2012 14:10:22 +1000 Subject: [PATCH 032/475] lib/raid6: fix sparse warnings in recovery functions Make the recovery functions static to fix the following sparse warnings: lib/raid6/recov.c:25:6: warning: symbol 'raid6_2data_recov_intx1' was not declared. Should it be static? lib/raid6/recov.c:69:6: warning: symbol 'raid6_datap_recov_intx1' was not declared. Should it be static? lib/raid6/recov_ssse3.c:22:6: warning: symbol 'raid6_2data_recov_ssse3' was not declared. Should it be static? lib/raid6/recov_ssse3.c:197:6: warning: symbol 'raid6_datap_recov_ssse3' was not declared. Should it be static? Reported-by: Fengguang Wu Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/recov.c | 7 ++++--- lib/raid6/recov_ssse3.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index 1805a5cc5da..a95bccb8497 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -22,8 +22,8 @@ #include /* Recover two failed data blocks. */ -void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, - void **ptrs) +static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, + int failb, void **ptrs) { u8 *p, *q, *dp, *dq; u8 px, qx, db; @@ -66,7 +66,8 @@ void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, } /* Recover failure of one data block plus the P block */ -void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs) +static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, + void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index 37ae6193055..ecb710c0b4d 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -19,8 +19,8 @@ static int raid6_has_ssse3(void) boot_cpu_has(X86_FEATURE_SSSE3); } -void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, - void **ptrs) +static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, + int failb, void **ptrs) { u8 *p, *q, *dp, *dq; const u8 *pbmul; /* P multiplier table for B data */ @@ -194,7 +194,8 @@ void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, } -void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) +static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, + void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ From c3b20037926e607b6cdaecbf9d3103e2ca63bc31 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 May 2012 22:33:02 +0100 Subject: [PATCH 033/475] drm/i915: Reset last_retired_head when resetting ring When we reset the ring control registers, including the HEAD and TAIL of the ring, we also need to reset associated state. In this instance, we were failing to reset the cached value of ring->last_retired_head and so upon the first request for more space following a resume would potentially (depending on a narrow race window) believe that the HEAD had advanced much further than reality. This is a regression from: commit a71d8d94525e8fd855c0466fb586ae1cb008f3a2 Author: Chris Wilson Date: Wed Feb 15 11:25:36 2012 +0000 drm/i915: Record the tail at each request and use it to estimate the head Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org # 3.4 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b59b6d5b758..490df551e31 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -326,6 +326,7 @@ static int init_ring_common(struct intel_ring_buffer *ring) ring->head = I915_READ_HEAD(ring); ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; ring->space = ring_space(ring); + ring->last_retired_head = -1; } return 0; From c10d7a13846bffa5c77f1122500b687ab902e2d2 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 10 May 2012 00:08:43 +0100 Subject: [PATCH 034/475] ACPI / PM: Generate wakeup events on fixed power button When the system is woken up by the ACPI fixed power button, currently there is no way of userspace becoming aware that the power button was pressed. OLPC would like to know this, so that we can respond appropriately. For example, if the system was woken up by a network packet, we know we can go back to sleep very quickly. But if the user explicitly woke the system with the power button, we're going to want to stay awake for a while. The wakeup count mechanism seems like a good fit for communicating this. Mark the fixed power button as wakeup-enabled, and increment its wakeup counter when the system is woken with the power button. (The wakeup counter is also incremented when the power button is pressed during system operation; this is already handled by an existing acpi-button codepath). Signed-off-by: Daniel Drake Acked-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 1 + drivers/acpi/sleep.c | 45 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 85cbfdccc97..c8a1f3b6811 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1567,6 +1567,7 @@ static int acpi_bus_scan_fixed(void) ACPI_BUS_TYPE_POWER_BUTTON, ACPI_STA_DEFAULT, &ops); + device_init_wakeup(&device->dev, true); } if ((acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON) == 0) { diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index eb6fd233764..a564fc3ffa1 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -57,6 +57,7 @@ MODULE_PARM_DESC(gts, "Enable evaluation of _GTS on suspend."); MODULE_PARM_DESC(bfs, "Enable evaluation of _BFS on resume".); static u8 sleep_states[ACPI_S_STATE_COUNT]; +static bool pwr_btn_event_pending; static void acpi_sleep_tts_switch(u32 acpi_state) { @@ -186,6 +187,14 @@ static int acpi_pm_prepare(void) return error; } +static int find_powerf_dev(struct device *dev, void *data) +{ + struct acpi_device *device = to_acpi_device(dev); + const char *hid = acpi_device_hid(device); + + return !strcmp(hid, ACPI_BUTTON_HID_POWERF); +} + /** * acpi_pm_finish - Instruct the platform to leave a sleep state. * @@ -194,6 +203,7 @@ static int acpi_pm_prepare(void) */ static void acpi_pm_finish(void) { + struct device *pwr_btn_dev; u32 acpi_state = acpi_target_sleep_state; acpi_ec_unblock_transactions(); @@ -211,6 +221,23 @@ static void acpi_pm_finish(void) acpi_set_firmware_waking_vector((acpi_physical_address) 0); acpi_target_sleep_state = ACPI_STATE_S0; + + /* If we were woken with the fixed power button, provide a small + * hint to userspace in the form of a wakeup event on the fixed power + * button device (if it can be found). + * + * We delay the event generation til now, as the PM layer requires + * timekeeping to be running before we generate events. */ + if (!pwr_btn_event_pending) + return; + + pwr_btn_event_pending = false; + pwr_btn_dev = bus_find_device(&acpi_bus_type, NULL, NULL, + find_powerf_dev); + if (pwr_btn_dev) { + pm_wakeup_event(pwr_btn_dev, 0); + put_device(pwr_btn_dev); + } } /** @@ -300,9 +327,23 @@ static int acpi_suspend_enter(suspend_state_t pm_state) /* ACPI 3.0 specs (P62) says that it's the responsibility * of the OSPM to clear the status bit [ implying that the * POWER_BUTTON event should not reach userspace ] + * + * However, we do generate a small hint for userspace in the form of + * a wakeup event. We flag this condition for now and generate the + * event later, as we're currently too early in resume to be able to + * generate wakeup events. */ - if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) - acpi_clear_event(ACPI_EVENT_POWER_BUTTON); + if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) { + acpi_event_status pwr_btn_status; + + acpi_get_event_status(ACPI_EVENT_POWER_BUTTON, &pwr_btn_status); + + if (pwr_btn_status & ACPI_EVENT_FLAG_SET) { + acpi_clear_event(ACPI_EVENT_POWER_BUTTON); + /* Flag for later */ + pwr_btn_event_pending = true; + } + } /* * Disable and clear GPE status before interrupt is enabled. Some GPEs From b2201e5482bc2376ea5c049442850a260142ac40 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Fri, 18 May 2012 22:59:41 +0200 Subject: [PATCH 035/475] rtc-cmos / PM: report wakeup event on ACPI RTC alarm When the ACPI-driven RTC alarm wakes the system, report it as a wakeup event. This allows userspace to determine that the reason for system wakeup was RTC alarm. Signed-off-by: Daniel Drake Signed-off-by: Rafael J. Wysocki --- drivers/rtc/rtc-cmos.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 7d5f56edb8e..4267789ca99 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -910,14 +910,17 @@ static inline int cmos_poweroff(struct device *dev) static u32 rtc_handler(void *context) { + struct device *dev = context; + + pm_wakeup_event(dev, 0); acpi_clear_event(ACPI_EVENT_RTC); acpi_disable_event(ACPI_EVENT_RTC, 0); return ACPI_INTERRUPT_HANDLED; } -static inline void rtc_wake_setup(void) +static inline void rtc_wake_setup(struct device *dev) { - acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, NULL); + acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); /* * After the RTC handler is installed, the Fixed_RTC event should * be disabled. Only when the RTC alarm is set will it be enabled. @@ -950,7 +953,7 @@ cmos_wake_setup(struct device *dev) if (acpi_disabled) return; - rtc_wake_setup(); + rtc_wake_setup(dev); acpi_rtc_info.wake_on = rtc_wake_on; acpi_rtc_info.wake_off = rtc_wake_off; From 63a1a765dffb1e59d82c7948638e56d5f4f2e3a1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 20 May 2012 13:57:52 +0200 Subject: [PATCH 036/475] ACPI / PM: Fix error messages in drivers/acpi/bus.c After recent changes of the ACPI device low-power states definitions kernel messages in drivers/acpi/bus.c need to be updated so that they include the correct names of the states in question (currently is "D3" for D3hot and "D4" for D3cold, which is incorrect). Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 3188da3df8d..a41be56c1cc 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -182,6 +182,24 @@ EXPORT_SYMBOL(acpi_bus_get_private_data); Power Management -------------------------------------------------------------------------- */ +static const char *state_string(int state) +{ + switch (state) { + case ACPI_STATE_D0: + return "D0"; + case ACPI_STATE_D1: + return "D1"; + case ACPI_STATE_D2: + return "D2"; + case ACPI_STATE_D3_HOT: + return "D3hot"; + case ACPI_STATE_D3_COLD: + return "D3"; + default: + return "(unknown)"; + } +} + static int __acpi_bus_get_power(struct acpi_device *device, int *state) { int result = 0; @@ -215,8 +233,8 @@ static int __acpi_bus_get_power(struct acpi_device *device, int *state) device->parent->power.state : ACPI_STATE_D0; } - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is D%d\n", - device->pnp.bus_id, *state)); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is %s\n", + device->pnp.bus_id, state_string(*state))); return 0; } @@ -234,13 +252,14 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state) /* Make sure this is a valid target state */ if (state == device->power.state) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at D%d\n", - state)); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at %s\n", + state_string(state))); return 0; } if (!device->power.states[state].flags.valid) { - printk(KERN_WARNING PREFIX "Device does not support D%d\n", state); + printk(KERN_WARNING PREFIX "Device does not support %s\n", + state_string(state)); return -ENODEV; } if (device->parent && (state < device->parent->power.state)) { @@ -294,13 +313,13 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state) end: if (result) printk(KERN_WARNING PREFIX - "Device [%s] failed to transition to D%d\n", - device->pnp.bus_id, state); + "Device [%s] failed to transition to %s\n", + device->pnp.bus_id, state_string(state)); else { device->power.state = state; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Device [%s] transitioned to D%d\n", - device->pnp.bus_id, state)); + "Device [%s] transitioned to %s\n", + device->pnp.bus_id, state_string(state))); } return result; From 38c92fff988d518fe80dc23d0d44d66bd7e47ddd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 20 May 2012 13:58:00 +0200 Subject: [PATCH 037/475] ACPI / PM: Make __acpi_bus_get_power() cover D3cold correctly After recent changes of the ACPI device power states definitions, if power resources are not used for the device's power management, the state returned by __acpi_bus_get_power() cannot exceed D3hot, because the return values of _PSC are 0 through 3. However, if the _PR3 method is not present for the device and _PS3 returns 3, we have to assume that the device is in D3cold, so the value returned by __acpi_bus_get_power() in that case should be 4. Similarly, acpi_power_get_inferred_state() should take the power resources for the D3hot state into account in general, so that it can return 3 if those resources are "on" or 4 (D3cold) otherwise. Fix the the above two issues and make sure that if both _PSC and _PR3 are present for the device, the power resources listed by _PR3 will be used to determine if the number 3 returned by _PSC is meant to represent D3cold or D3hot. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 51 +++++++++++++++++++++++++------------------- drivers/acpi/power.c | 2 +- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index a41be56c1cc..adceafda9c1 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -202,37 +202,44 @@ static const char *state_string(int state) static int __acpi_bus_get_power(struct acpi_device *device, int *state) { - int result = 0; - acpi_status status = 0; - unsigned long long psc = 0; + int result = ACPI_STATE_UNKNOWN; if (!device || !state) return -EINVAL; - *state = ACPI_STATE_UNKNOWN; - - if (device->flags.power_manageable) { - /* - * Get the device's power state either directly (via _PSC) or - * indirectly (via power resources). - */ - if (device->power.flags.power_resources) { - result = acpi_power_get_inferred_state(device, state); - if (result) - return result; - } else if (device->power.flags.explicit_get) { - status = acpi_evaluate_integer(device->handle, "_PSC", - NULL, &psc); - if (ACPI_FAILURE(status)) - return -ENODEV; - *state = (int)psc; - } - } else { + if (!device->flags.power_manageable) { /* TBD: Non-recursive algorithm for walking up hierarchy. */ *state = device->parent ? device->parent->power.state : ACPI_STATE_D0; + goto out; } + /* + * Get the device's power state either directly (via _PSC) or + * indirectly (via power resources). + */ + if (device->power.flags.explicit_get) { + unsigned long long psc; + acpi_status status = acpi_evaluate_integer(device->handle, + "_PSC", NULL, &psc); + if (ACPI_FAILURE(status)) + return -ENODEV; + + result = psc; + } + /* The test below covers ACPI_STATE_UNKNOWN too. */ + if (result <= ACPI_STATE_D2) { + ; /* Do nothing. */ + } else if (device->power.flags.power_resources) { + int error = acpi_power_get_inferred_state(device, &result); + if (error) + return error; + } else if (result == ACPI_STATE_D3_HOT) { + result = ACPI_STATE_D3; + } + *state = result; + + out: ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is %s\n", device->pnp.bus_id, state_string(*state))); diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 0500f719f63..dd6d6a3c678 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -631,7 +631,7 @@ int acpi_power_get_inferred_state(struct acpi_device *device, int *state) * We know a device's inferred power state when all the resources * required for a given D-state are 'on'. */ - for (i = ACPI_STATE_D0; i < ACPI_STATE_D3_HOT; i++) { + for (i = ACPI_STATE_D0; i <= ACPI_STATE_D3_HOT; i++) { list = &device->power.states[i].resources; if (list->count < 1) continue; From dbe9a2edd17d843d80faf2b99f20a691c1853418 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 May 2012 21:21:07 +0200 Subject: [PATCH 038/475] ACPI / PM: Make acpi_pm_device_sleep_state() follow the specification The comparison between the system sleep state being entered and the lowest system sleep state the given device may wake up from in acpi_pm_device_sleep_state() is reversed, because the specification (ACPI 5.0) says that for wakeup to work: "The sleeping state being entered must be less than or equal to the power state declared in element 1 of the _PRW object." In other words, the state returned by _PRW is the deepest (lowest-power) system sleep state the device is capable of waking up the system from. Moreover, acpi_pm_device_sleep_state() also should check if the wakeup capability is supported through ACPI, because in principle it may be done via native PCIe PME, for example, in which case _SxW should not be evaluated. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index a564fc3ffa1..d8b381e94ee 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -773,8 +773,8 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) * can wake the system. _S0W may be valid, too. */ if (acpi_target_sleep_state == ACPI_STATE_S0 || - (device_may_wakeup(dev) && - adev->wakeup.sleep_state <= acpi_target_sleep_state)) { + (device_may_wakeup(dev) && adev->wakeup.flags.valid && + adev->wakeup.sleep_state >= acpi_target_sleep_state)) { acpi_status status; acpi_method[3] = 'W'; From cd4fedf9cfe2d453148dce0f105a41725e5107a3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 23 May 2012 21:02:07 +0530 Subject: [PATCH 039/475] RDMA/ocrdma: Correct queue free count math Correct queue free count math for SQ, RQ for all hardware type. Update user-kernel ABI interface. Signed-off-by: Parav Pandit Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 1 - drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 5 +---- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 7 ------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 5 ----- 4 files changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 85a69c95855..037f5cea85b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -231,7 +231,6 @@ struct ocrdma_qp_hwq_info { u32 entry_size; u32 max_cnt; u32 max_wqe_idx; - u32 free_delta; u16 dbid; /* qid, where to ring the doorbell. */ u32 len; dma_addr_t pa; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index a411a4e3193..517ab20b727 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -101,8 +101,6 @@ struct ocrdma_create_qp_uresp { u32 rsvd1; u32 num_wqe_allocated; u32 num_rqe_allocated; - u32 free_wqe_delta; - u32 free_rqe_delta; u32 db_sq_offset; u32 db_rq_offset; u32 db_shift; @@ -126,8 +124,7 @@ struct ocrdma_create_srq_uresp { u32 db_rq_offset; u32 db_shift; - u32 free_rqe_delta; - u32 rsvd2; + u64 rsvd2; u64 rsvd3; } __packed; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 9b204b1ba33..f26314fd5f6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1990,19 +1990,12 @@ static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp, max_wqe_allocated = 1 << max_wqe_allocated; max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe); - if (qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - qp->sq.free_delta = 0; - qp->rq.free_delta = 1; - } else - qp->sq.free_delta = 1; - qp->sq.max_cnt = max_wqe_allocated; qp->sq.max_wqe_idx = max_wqe_allocated - 1; if (!attrs->srq) { qp->rq.max_cnt = max_rqe_allocated; qp->rq.max_wqe_idx = max_rqe_allocated - 1; - qp->rq.free_delta = 1; } } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e9f74d1b48f..d16d172b6b6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -940,8 +940,6 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; uresp.db_shift = 16; } - uresp.free_wqe_delta = qp->sq.free_delta; - uresp.free_rqe_delta = qp->rq.free_delta; if (qp->dpp_enabled) { uresp.dpp_credit = dpp_credit_lmt; @@ -1307,8 +1305,6 @@ static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q) free_cnt = (q->max_cnt - q->head) + q->tail; else free_cnt = q->tail - q->head; - if (q->free_delta) - free_cnt -= q->free_delta; return free_cnt; } @@ -1501,7 +1497,6 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata) (srq->pd->id * srq->dev->nic_info.db_page_size); uresp.db_page_size = srq->dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; - uresp.free_rqe_delta = 1; if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET; uresp.db_shift = 24; From 804eaf29bac4148aa265bedc62182ed41a4c6120 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 23 May 2012 21:11:17 +0530 Subject: [PATCH 040/475] RDMA/ocrdma: Fix signaled event for SRQ_LIMIT_REACHED Signed-off-by: Parav Pandit Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index f26314fd5f6..9343a152297 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -732,7 +732,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, break; case OCRDMA_SRQ_LIMIT_EVENT: ib_evt.element.srq = &qp->srq->ibsrq; - ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED; + ib_evt.event = IB_EVENT_SRQ_LIMIT_REACHED; srq_event = 1; qp_event = 0; break; From 7ad5e449b96bd82f406ed4657a64c8f72a48896d Mon Sep 17 00:00:00 2001 From: Devendra Naga Date: Tue, 29 May 2012 12:51:47 +0530 Subject: [PATCH 041/475] RDMA/ocrdma: Remove unnecessary version.h includes "make versioncheck" shows: drivers/infiniband/hw/ocrdma/ocrdma_main.c: 29 linux/version.h not needed. drivers/infiniband/hw/ocrdma/ocrdma_verbs.h: 31 linux/version.h not needed. Signed-off-by: Devendra Naga Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index a20d16eaae7..04fef3de6d7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -26,7 +26,6 @@ *******************************************************************/ #include -#include #include #include #include diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index e6483439f25..633f03d8027 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -28,7 +28,6 @@ #ifndef __OCRDMA_VERBS_H__ #define __OCRDMA_VERBS_H__ -#include int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, struct ib_send_wr **bad_wr); int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, From 9f646389aa7727a2fd8f9ae6337b92af9cfbc264 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 May 2012 16:39:09 +0200 Subject: [PATCH 042/475] sched/x86: Use cpu_llc_shared_mask(cpu) for coregroup_mask Commit commit 8e7fbcbc2 ("sched: Remove stale power aware scheduling remnants and dysfunctional knobs") made a boo-boo with removing the power aware scheduling muck from the x86 topology bits. We should unconditionally use the llc_shared mask for multi-core. Reported-and-tested-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Borislav Petkov Cc: Andreas Herrmann Link: http://lkml.kernel.org/n/tip-lsksc2kfyeveb13avh327p0d@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f56f96da77f..fd019d78b1f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -410,15 +410,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) /* maps the cpu to the sched domain representing multi-core */ const struct cpumask *cpu_coregroup_mask(int cpu) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - /* - * For perf, we return last level cache shared map. - * And for power savings, we return cpu_core_map - */ - if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) - return cpu_core_mask(cpu); - else - return cpu_llc_shared_mask(cpu); + return cpu_llc_shared_mask(cpu); } static void impress_friends(void) From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 May 2012 17:15:29 +0200 Subject: [PATCH 043/475] sched/nohz: Fix rq->cpu_load calculations some more Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[] calculations") since while that fixed the busy case it regressed the mostly idle case. Add a callback from the nohz exit to also age the rq->cpu_load[] array. This closes the hole where either there was no nohz load balance pass during the nohz, or there was a 'significant' amount of idle time between the last nohz balance and the nohz exit. So we'll update unconditionally from the tick to not insert any accidental 0 load periods while busy, and we try and catch up from nohz idle balance and nohz exit. Both these are still prone to missing a jiffy, but that has always been the case. Signed-off-by: Peter Zijlstra Cc: pjt@google.com Cc: Venkatesh Pallipadi Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched/core.c | 53 ++++++++++++++++++++++++++++++++-------- kernel/time/tick-sched.c | 1 + 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index f45c0b280b5..d61e5977e51 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); +extern void update_cpu_load_nohz(void); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 39eb6011bc3..75844a8f9ae 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2517,25 +2517,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } +#ifdef CONFIG_NO_HZ +/* + * There is no sane way to deal with nohz on smp when using jiffies because the + * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading + * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. + * + * Therefore we cannot use the delta approach from the regular tick since that + * would seriously skew the load calculation. However we'll make do for those + * updates happening while idle (nohz_idle_balance) or coming out of idle + * (tick_nohz_idle_exit). + * + * This means we might still be one tick off for nohz periods. + */ + /* * Called from nohz_idle_balance() to update the load ratings before doing the * idle balance. */ void update_idle_cpu_load(struct rq *this_rq) { - unsigned long curr_jiffies = jiffies; + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); unsigned long load = this_rq->load.weight; unsigned long pending_updates; /* - * Bloody broken means of dealing with nohz, but better than nothing.. - * jiffies is updated by one cpu, another cpu can drift wrt the jiffy - * update and see 0 difference the one time and 2 the next, even though - * we ticked at roughtly the same rate. - * - * Hence we only use this from nohz_idle_balance() and skip this - * nonsense when called from the scheduler_tick() since that's - * guaranteed a stable rate. + * bail if there's load or we're actually up-to-date. */ if (load || curr_jiffies == this_rq->last_load_update_tick) return; @@ -2546,13 +2553,39 @@ void update_idle_cpu_load(struct rq *this_rq) __update_cpu_load(this_rq, load, pending_updates); } +/* + * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. + */ +void update_cpu_load_nohz(void) +{ + struct rq *this_rq = this_rq(); + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); + unsigned long pending_updates; + + if (curr_jiffies == this_rq->last_load_update_tick) + return; + + raw_spin_lock(&this_rq->lock); + pending_updates = curr_jiffies - this_rq->last_load_update_tick; + if (pending_updates) { + this_rq->last_load_update_tick = curr_jiffies; + /* + * We were idle, this means load 0, the current load might be + * !0 due to remote wakeups and the sort. + */ + __update_cpu_load(this_rq, 0, pending_updates); + } + raw_spin_unlock(&this_rq->lock); +} +#endif /* CONFIG_NO_HZ */ + /* * Called from scheduler_tick() */ static void update_cpu_load_active(struct rq *this_rq) { /* - * See the mess in update_idle_cpu_load(). + * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). */ this_rq->last_load_update_tick = jiffies; __update_cpu_load(this_rq, this_rq->load.weight, 1); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9ff56..0c927cd8534 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void) /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); + update_cpu_load_nohz(); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* From 2ea45800d8e1c3c51c45a233d6bd6289a297a386 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 May 2012 09:26:43 +0200 Subject: [PATCH 044/475] sched: Don't try allocating memory from offline nodes Allocators don't appreciate it when you try and allocate memory from offline nodes. Reported-and-tested-by: Tony Luck Reported-and-tested-by: Anton Blanchard Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-epfc1io9whb7o22bcujf31vn@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 75844a8f9ae..55733616baa 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6436,7 +6436,7 @@ static void sched_init_numa(void) return; for (j = 0; j < nr_node_ids; j++) { - struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); + struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); if (!mask) return; From 74a5ce20e6eeeb3751340b390e7ac1d1d07bbf55 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 May 2012 18:00:43 +0200 Subject: [PATCH 045/475] sched: Fix SD_OVERLAP SD_OVERLAP exists to allow overlapping groups, overlapping groups appear in NUMA topologies that aren't fully connected. The typical result of not fully connected NUMA is that each cpu (or rather node) will have different spans for a particular distance. However due to how sched domains are traversed -- only the first cpu in the mask goes one level up -- the next level only cares about the spans of the cpus that went up. Due to this two things were observed to be broken: - build_overlap_sched_groups() -- since its possible the cpu we're building the groups for exists in multiple (or all) groups, the selection criteria of the first group didn't ensure there was a cpu for which is was true that cpumask_first(span) == cpu. Thus load- balancing would terminate. - update_group_power() -- assumed that the cpu span of the first group of the domain was covered by all groups of the child domain. The above explains why this isn't true, so deal with it. Signed-off-by: Peter Zijlstra Cc: David Rientjes Link: http://lkml.kernel.org/r/1337788843.9783.14.camel@laptop Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 7 +++++-- kernel/sched/fair.c | 25 ++++++++++++++++++++----- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 55733616baa..3a69374fb42 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6030,11 +6030,14 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) cpumask_or(covered, covered, sg_span); - sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); + sg->sgp = *per_cpu_ptr(sdd->sgp, i); atomic_inc(&sg->sgp->ref); - if (cpumask_test_cpu(cpu, sg_span)) + if ((!groups && cpumask_test_cpu(cpu, sg_span)) || + cpumask_first(sg_span) == cpu) { + WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span)); groups = sg; + } if (!first) first = sg; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 940e6d17cf9..f0380d4987b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3574,11 +3574,26 @@ void update_group_power(struct sched_domain *sd, int cpu) power = 0; - group = child->groups; - do { - power += group->sgp->power; - group = group->next; - } while (group != child->groups); + if (child->flags & SD_OVERLAP) { + /* + * SD_OVERLAP domains cannot assume that child groups + * span the current group. + */ + + for_each_cpu(cpu, sched_group_cpus(sdg)) + power += power_of(cpu); + } else { + /* + * !SD_OVERLAP domains can assume that child groups + * span the current group. + */ + + group = child->groups; + do { + power += group->sgp->power; + group = group->next; + } while (group != child->groups); + } sdg->sgp->power = power; } From b654f7de41b0e3903ee2b51d3b8db77fe52ce728 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 May 2012 14:04:28 +0200 Subject: [PATCH 046/475] sched: Make sure to not re-read variables after validation We could re-read rq->rt_avg after we validated it was smaller than total, invalidating the check and resulting in an unintended negative. Signed-off-by: Peter Zijlstra Cc: David Rientjes Link: http://lkml.kernel.org/r/1337688268.9698.29.camel@twins Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f0380d4987b..2b449a76207 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3503,15 +3503,22 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) unsigned long scale_rt_power(int cpu) { struct rq *rq = cpu_rq(cpu); - u64 total, available; + u64 total, available, age_stamp, avg; - total = sched_avg_period() + (rq->clock - rq->age_stamp); + /* + * Since we're reading these variables without serialization make sure + * we read them once before doing sanity checks on them. + */ + age_stamp = ACCESS_ONCE(rq->age_stamp); + avg = ACCESS_ONCE(rq->rt_avg); - if (unlikely(total < rq->rt_avg)) { + total = sched_avg_period() + (rq->clock - age_stamp); + + if (unlikely(total < avg)) { /* Ensures that power won't end up being negative */ available = 0; } else { - available = total - rq->rt_avg; + available = total - avg; } if (unlikely((s64)total < SCHED_POWER_SCALE)) From 29baa7478ba47d746e3625c91d3b2afbf46b4312 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Apr 2012 12:11:21 +0200 Subject: [PATCH 047/475] sched: Move nr_cpus_allowed out of 'struct sched_rt_entity' Since nr_cpus_allowed is used outside of sched/rt.c and wants to be used outside of there more, move it to a more natural site. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-kr61f02y9brwzkh6x53pdptm@git.kernel.org Signed-off-by: Ingo Molnar --- arch/blackfin/kernel/process.c | 2 +- include/linux/init_task.h | 2 +- include/linux/sched.h | 2 +- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 36 ++++++++++++++++++++-------------- 6 files changed, 26 insertions(+), 20 deletions(-) diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 2e3994b2016..62bcea7dcc6 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs) unsigned long newsp; #ifdef __ARCH_SYNC_CORE_DCACHE - if (current->rt.nr_cpus_allowed == num_possible_cpus()) + if (current->nr_cpus_allowed == num_possible_cpus()) set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id())); #endif diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e4baff5f7ff..9e65eff6af3 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ + .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ .se = { \ @@ -157,7 +158,6 @@ extern struct cred init_cred; .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ .time_slice = RR_TIMESLICE, \ - .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index d61e5977e51..0f50e78f7f4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1188,7 +1188,6 @@ struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned int time_slice; - int nr_cpus_allowed; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED @@ -1253,6 +1252,7 @@ struct task_struct { #endif unsigned int policy; + int nr_cpus_allowed; cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3a69374fb42..70cc36a6073 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5015,7 +5015,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) p->sched_class->set_cpus_allowed(p, new_mask); cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); } /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2b449a76207..b2a2d236f27 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) int want_sd = 1; int sync = wake_flags & WF_SYNC; - if (p->rt.nr_cpus_allowed == 1) + if (p->nr_cpus_allowed == 1) return prev_cpu; if (sd_flag & SD_BALANCE_WAKE) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c5565c3c515..295da737b6f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq) static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total++; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory++; update_rt_migration(rt_rq); @@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total--; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory--; update_rt_migration(rt_rq); @@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); - if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); inc_nr_running(rq); @@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) cpu = task_cpu(p); - if (p->rt.nr_cpus_allowed == 1) + if (p->nr_cpus_allowed == 1) goto out; /* For anything but wake ups, just return the task_cpu */ @@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && - (curr->rt.nr_cpus_allowed < 2 || + (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio) && - (p->rt.nr_cpus_allowed > 1)) { + (p->nr_cpus_allowed > 1)) { int target = find_lowest_rq(p); if (target != -1) @@ -1276,10 +1282,10 @@ out: static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - if (rq->curr->rt.nr_cpus_allowed == 1) + if (rq->curr->nr_cpus_allowed == 1) return; - if (p->rt.nr_cpus_allowed != 1 + if (p->nr_cpus_allowed != 1 && cpupri_find(&rq->rd->cpupri, p, NULL)) return; @@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) + if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && - (p->rt.nr_cpus_allowed > 1)) + (p->nr_cpus_allowed > 1)) return 1; return 0; } @@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task) if (unlikely(!lowest_mask)) return -1; - if (task->rt.nr_cpus_allowed == 1) + if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) @@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(p->rt.nr_cpus_allowed <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!p->on_rq); BUG_ON(!rt_task(p)); @@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && has_pushable_tasks(rq) && - p->rt.nr_cpus_allowed > 1 && + p->nr_cpus_allowed > 1 && rt_task(rq->curr) && - (rq->curr->rt.nr_cpus_allowed < 2 || + (rq->curr->nr_cpus_allowed < 2 || rq->curr->prio <= p->prio)) push_rt_tasks(rq); } @@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, * Only update if the process changes its state from whether it * can migrate or not. */ - if ((p->rt.nr_cpus_allowed > 1) == (weight > 1)) + if ((p->nr_cpus_allowed > 1) == (weight > 1)) return; rq = task_rq(p); From 454c79999f7eaedcdf4c15c449e43902980cbdf5 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 16 May 2012 21:34:23 -0700 Subject: [PATCH 048/475] sched/rt: Fix SCHED_RR across cgroups task_tick_rt() has an optimization to only reschedule SCHED_RR tasks if they were the only element on their rq. However, with cgroups a SCHED_RR task could be the only element on its per-cgroup rq but still be competing with other SCHED_RR tasks in its parent's cgroup. In this case, the SCHED_RR task in the child cgroup would never yield at the end of its timeslice. If the child cgroup rt_runtime_us was the same as the parent cgroup rt_runtime_us, the task in the parent cgroup would starve completely. Modify task_tick_rt() to check that the task is the only task on its rq, and that the each of the scheduling entities of its ancestors is also the only entity on its rq. Signed-off-by: Colin Cross Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1337229266-15798-1-git-send-email-ccross@android.com Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 295da737b6f..2a4e8dffbd6 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1985,6 +1985,8 @@ static void watchdog(struct rq *rq, struct task_struct *p) static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { + struct sched_rt_entity *rt_se = &p->rt; + update_curr_rt(rq); watchdog(rq, p); @@ -2002,12 +2004,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) p->rt.time_slice = RR_TIMESLICE; /* - * Requeue to the end of queue if we are not the only element - * on the queue: + * Requeue to the end of queue if we (and all of our ancestors) are the + * only element on the queue */ - if (p->rt.run_list.prev != p->rt.run_list.next) { - requeue_task_rt(rq, p, 0); - set_tsk_need_resched(p); + for_each_sched_rt_entity(rt_se) { + if (rt_se->run_list.prev != rt_se->run_list.next) { + requeue_task_rt(rq, p, 0); + set_tsk_need_resched(p); + return; + } } } From 1292531f6f27af909e713671dd9cc3bcab8114b7 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 25 May 2012 15:41:54 +0900 Subject: [PATCH 049/475] sched: Make sched_feat_names const The strings sched_feat_names are never changed. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4FBF29B2.9030904@ct.jp.nec.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 70cc36a6073..c1679a098fc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -142,7 +142,7 @@ const_debug unsigned int sysctl_sched_features = #define SCHED_FEAT(name, enabled) \ #name , -static __read_mostly char *sched_feat_names[] = { +static const char * const sched_feat_names[] = { #include "features.h" NULL }; From 7997a456ef841bb78eb6f881d7cc2c17c2f9b35e Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 25 May 2012 15:42:47 +0900 Subject: [PATCH 050/475] sched: Remove the last NULL entry from sched_feat_names No need to have the last NULL entry. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4FBF29E7.5020805@ct.jp.nec.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1679a098fc..94d598ac5e6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -144,7 +144,6 @@ const_debug unsigned int sysctl_sched_features = static const char * const sched_feat_names[] = { #include "features.h" - NULL }; #undef SCHED_FEAT From 6a4c96eef42f835734a82c6b512abf9881b7c55d Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Wed, 23 May 2012 14:44:11 +0530 Subject: [PATCH 051/475] sched: Remove NULL assignment of dattr_cur Remove explicit NULL assignment of static pointer dattr_cur from init_sched_domains(). Signed-off-by: Kamalesh Babulal Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120523091411.GG5005@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94d598ac5e6..c46958e2612 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6726,7 +6726,6 @@ static int init_sched_domains(const struct cpumask *cpu_map) if (!doms_cur) doms_cur = &fallback_doms; cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); - dattr_cur = NULL; err = build_sched_domains(doms_cur[0], NULL); register_sched_domain_sysctl(); From 5e152e6c4b0da84c66cad56597b42c4ecedb0448 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 13:28:44 -0400 Subject: [PATCH 052/475] xen/events: Add WARN_ON when quick lookup found invalid type. All of the bind_XYZ_to_irq do a quick lookup to see if the event exists. And if it does, then the initialized IRQ number is returned instead of initializing a new IRQ number. This patch adds an extra logic to check that the type returned is proper one and that there is an IRQ handler setup for it. This patch has the benefit of being able to find drivers that are doing something naught. [v1: Enhanced based on Stefano's review] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index faae2f910ad..af8b8fbd9d8 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -827,6 +827,9 @@ int bind_evtchn_to_irq(unsigned int evtchn) handle_edge_irq, "event"); xen_irq_info_evtchn_init(irq, evtchn); + } else { + struct irq_info *info = info_for_irq(irq); + WARN_ON(info == NULL || info->type != IRQT_EVTCHN); } out: @@ -862,6 +865,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); bind_evtchn_to_cpu(evtchn, cpu); + } else { + struct irq_info *info = info_for_irq(irq); + WARN_ON(info == NULL || info->type != IRQT_IPI); } out: @@ -939,6 +945,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) xen_irq_info_virq_init(cpu, irq, evtchn, virq); bind_evtchn_to_cpu(evtchn, cpu); + } else { + struct irq_info *info = info_for_irq(irq); + WARN_ON(info == NULL || info->type != IRQT_VIRQ); } out: From 780dbcd0eedec6528d777b668019dabb36badf1a Mon Sep 17 00:00:00 2001 From: "Zhang, Yang Z" Date: Tue, 22 May 2012 08:40:16 +0000 Subject: [PATCH 053/475] xen/pci: Check for PCI bridge before using it. Some SR-IOV devices may use more than one bus number, but there is no real bridges because that have internal routing mechanism. So need to check whether the bridge is existing before using it. Signed-off-by: Yang Zhang Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index b84bf0b6cc3..18fff88254e 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -59,7 +59,7 @@ static int xen_add_device(struct device *dev) #ifdef CONFIG_ACPI handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); - if (!handle) + if (!handle && pci_dev->bus->bridge) handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); #ifdef CONFIG_PCI_IOV if (!handle && pci_dev->is_virtfn) From 58b7b53a36b0be8081fbfc91aeea24b83c20ca1b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 29 May 2012 12:36:43 -0400 Subject: [PATCH 054/475] xen/balloon: Subtract from xen_released_pages the count that is populated. We did not take into account that xen_released_pages would be used outside the initial E820 parsing code. As such we would did not subtract from xen_released_pages the count of pages that we had populated back (instead we just did a simple extra_pages = released - populated). The balloon driver uses xen_released_pages to set the initial current_pages count. If this is wrong (too low) then when a new (higher) target is set, the balloon driver will request too many pages from Xen." This fixes errors such as: (XEN) memory.c:133:d0 Could not allocate order=0 extent: id=0 memflags=0 (51 of 512) during bootup and free_memory : 0 where the free_memory should be 128. Acked-by: David Vrabel [v1: Per David's review made the git commit better] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3ebba0753d3..a4790bf22c5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -371,7 +371,8 @@ char * __init xen_memory_setup(void) populated = xen_populate_chunk(map, memmap.nr_entries, max_pfn, &last_pfn, xen_released_pages); - extra_pages += (xen_released_pages - populated); + xen_released_pages -= populated; + extra_pages += xen_released_pages; if (last_pfn > max_pfn) { max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); From 91557e847dbc715acf2d847a1ffec63f71b00b65 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 30 May 2012 12:25:52 -0300 Subject: [PATCH 055/475] perf report: Use the right symbol for annotation In non symbolic views, i.e. --sort without "symbol", as in: perf report --sort comm We're segfaulting in the --tui because we're testing the symbol resolved and then trying to use the symbol on the histogram entry where we're coalescing all hits for a COMM, and the first hist_entry for a comm may have a NULL symbol, i.e. the RIP didn't resolve to any symbol. In this case we're segfaulting, fix it by testing against the symbol in the histogram entry. Reported-by: William Cohen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8ylwubbcmu27ucc9ffrku3yv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8c767c6bca9..2400e009f14 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, * so we don't allocated the extra space needed because the stdio * code will not use it. */ - if (al->sym != NULL && use_browser > 0) { + if (he->ms.sym != NULL && use_browser > 0) { struct annotation *notes = symbol__annotation(he->ms.sym); assert(evsel != NULL); From 107baecaca0b2843f1a0464701b253e51ef6f0e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 30 May 2012 12:31:44 -0300 Subject: [PATCH 056/475] perf annotate browser: Fix help window entry for navigating to hottest line Its 'H', not 'h'. The later is for getting to the help window. Reported-by: Ingo Molnar Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-7zvwphhm815y2zczoxgstzuf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4deea6aaf92..34b1c46eaf4 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -668,7 +668,7 @@ static int annotate_browser__run(struct annotate_browser *browser, int evidx, "q/ESC/CTRL+C Exit\n\n" "-> Go to target\n" "<- Exit\n" - "h Cycle thru hottest instructions\n" + "H Cycle thru hottest instructions\n" "j Toggle showing jump to target arrows\n" "J Toggle showing number of jump sources on targets\n" "n Search next string\n" From 79695e1bb65ba0e21488c360a1bed6e358354aaa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 30 May 2012 13:53:54 -0300 Subject: [PATCH 057/475] perf stat: Initialize default events wrt exclude_{guest,host} When no event is specified the tools use perf_evlist__add_default(), that will call event_attr_init to initialize the KVM exclusion bits. When the change was made to the tools so that by default guest samples would be excluded, the changes were made just to the parsing routines and to perf_evlist__add_default(), not to perf_evlist__add_attrs, that is used so far just by perf stat to add multiple events, according to the level of detail specified. Recently the tools were changed to reconstruct the event name from all the details in perf_event_attr, not just from .type and .config, but taking into account all the feature bits (.exclude_{guest,host,user,kernel,etc}, .precise_ip, etc). That is when we noticed that the default for perf stat wasn't the one for the rest of the tools, i.e. the .exclude_guest bit wasn't being set. I.e. the default, that doesn't call event_attr_init was showing the :HG modifier: $ perf stat usleep 1 Performance counter stats for 'usleep 1': 0.942119 task-clock # 0.454 CPUs utilized 1 context-switches # 0.001 M/sec 0 CPU-migrations # 0.000 K/sec 126 page-faults # 0.134 M/sec 693,193 cycles:HG # 0.736 GHz [40.11%] 407,461 stalled-cycles-frontend:HG # 58.78% frontend cycles idle [72.29%] 365,403 stalled-cycles-backend:HG # 52.71% backend cycles idle 465,982 instructions:HG # 0.67 insns per cycle # 0.87 stalled cycles per insn 89,760 branches:HG # 95.275 M/sec 6,178 branch-misses:HG # 6.88% of all branches 0.002077228 seconds time elapsed While if one explicitely specifies the same events, which will make the parsing code to be called and thus event_attr_init is called: $ perf stat -e task-clock,context-switches,migrations,page-faults,cycles,stalled-cycles-frontend,stalled-cycles-backend,instructions,branches,branch-misses usleep 1 Performance counter stats for 'usleep 1': 1.040349 task-clock # 0.500 CPUs utilized 2 context-switches # 0.002 M/sec 0 CPU-migrations # 0.000 K/sec 127 page-faults # 0.122 M/sec 587,966 cycles # 0.565 GHz [13.18%] 459,167 stalled-cycles-frontend # 78.09% frontend cycles idle 390,249 stalled-cycles-backend # 66.37% backend cycles idle 504,006 instructions # 0.86 insns per cycle # 0.91 stalled cycles per insn 96,455 branches # 92.714 M/sec 6,522 branch-misses # 6.76% of all branches [96.12%] 0.002078681 seconds time elapsed Fix it by introducing a perf_evlist__add_default_attrs method that will call evlist_attr_init in all the perf_event_attr entries before adding the events. Reported-by: Ingo Molnar Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-4eysr236r0pgiyum9epwxw7s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++---- tools/perf/util/evlist.c | 11 +++++++++++ tools/perf/util/evlist.h | 4 ++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 62ae30d34fa..262589991ea 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1129,7 +1129,7 @@ static int add_default_attributes(void) return 0; if (!evsel_list->nr_entries) { - if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) return -1; } @@ -1139,21 +1139,21 @@ static int add_default_attributes(void) return 0; /* Append detailed run extra attributes: */ - if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) return -1; if (detailed_run < 2) return 0; /* Append very detailed run extra attributes: */ - if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) return -1; if (detailed_run < 3) return 0; /* Append very, very detailed run extra attributes: */ - return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs); + return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } int cmd_stat(int argc, const char **argv, const char *prefix __used) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4ac5f5ae4ce..ed277e5627c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -159,6 +159,17 @@ out_delete_partial_list: return -1; } +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs) +{ + size_t i; + + for (i = 0; i < nr_attrs; i++) + event_attr_init(attrs + i); + + return perf_evlist__add_attrs(evlist, attrs, nr_attrs); +} + static int trace_event__id(const char *evname) { char *filename, *colon; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 58abb63ac13..989bee9624c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -54,6 +54,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); int perf_evlist__add_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs); int perf_evlist__add_tracepoints(struct perf_evlist *evlist, const char *tracepoints[], size_t nr_tracepoints); int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, @@ -62,6 +64,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, #define perf_evlist__add_attrs_array(evlist, array) \ perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) +#define perf_evlist__add_default_attrs(evlist, array) \ + __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) #define perf_evlist__add_tracepoints_array(evlist, array) \ perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) From 52deff71bc2b2c24587ab71f588ff5e4c9279349 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 May 2012 22:58:26 -0600 Subject: [PATCH 058/475] perf script: Fix regression in callchain dso name $ perf script -i /tmp/perf.data ... gcc 13623 544315.062858: context-switches: ffffffff815f65c9 __schedule ([kernel.kallsyms]) ffffffff81087cea __cond_resched ([kernel.kallsyms]) ffffffff815f6b92 _cond_resched ([kernel.kallsyms]) ffffffff815fb87a do_page_fault ([kernel.kallsyms]) ffffffff815f8465 page_fault ([kernel.kallsyms]) 2b7a71ea0303 _dl_lookup_symbol_x ([kernel.kallsyms]) 2b7a71ea1eb5 _dl_relocate_object ([kernel.kallsyms]) 2b7a71e99b2e dl_main ([kernel.kallsyms]) 2b7a71eab7f4 _dl_sysdep_start ([kernel.kallsyms]) All DSO's in a callchain are printed as [kernel.kallsyms]. git bisect chased it to: 547a92e0aedb88129e7fbd804697a11949de2e5a is the first bad commit commit 547a92e0aedb88129e7fbd804697a11949de2e5a Author: Akihiro Nagai Date: Mon Jan 30 13:42:57 2012 +0900 perf script: Unify the expressions indicating "unknown" The perf script command uses various expressions to indicate "unknown". It is unfriendly for user scripts to parse it. So, this patch unifies the expressions to "[unknown]". Looks like a copy-paste in that the other references use al.map but this one should be node->map. With this patch you get: $ perf script -i /tmp/perf.data ... gcc 13623 544315.062858: context-switches: ffffffff815f65c9 __schedule ([kernel.kallsyms]) ffffffff81087cea __cond_resched ([kernel.kallsyms]) ffffffff815f6b92 _cond_resched ([kernel.kallsyms]) ffffffff815fb87a do_page_fault ([kernel.kallsyms]) ffffffff815f8465 page_fault ([kernel.kallsyms]) 2b7a71ea0303 _dl_lookup_symbol_x (/lib64/ld-2.14.90.so) 2b7a71ea1eb5 _dl_relocate_object (/lib64/ld-2.14.90.so) 2b7a71e99b2e dl_main (/lib64/ld-2.14.90.so) 2b7a71eab7f4 _dl_sysdep_start (/lib64/ld-2.14.90.so) Signed-off-by: David Ahern Cc: Akihiro Nagai Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1338353906-60706-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 93d355d2710..48206144758 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1460,7 +1460,7 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, } if (print_dso) { printf(" ("); - map__fprintf_dsoname(al.map, stdout); + map__fprintf_dsoname(node->map, stdout); printf(")"); } printf("\n"); From f1439c315b328bbb3f7f6fdd814a7057f9e130d5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 30 May 2012 15:02:42 -0300 Subject: [PATCH 059/475] perf tools: Fix make tarballs The patch series that introduced the top level tools/ makefile and the libtraceevent broke this feature where files needed to build in a detached tarball were not included in the MANIFEST file and thus not included in the tarball. Fix it by adding the relevant files to the MANIFEST. Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-z3mjj74927xvqwhlmu18kj80@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 5476bc0a1ea..b4b572e8c10 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,4 +1,6 @@ tools/perf +tools/scripts +tools/lib/traceevent include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h From ea1b3ebac9a1c72c4362c784b4ed069a938a4ddb Mon Sep 17 00:00:00 2001 From: Avik Sil Date: Tue, 29 May 2012 16:05:25 +0530 Subject: [PATCH 060/475] perf tools: Fix pager on minimal-install embedded systems Some Distributions may lack "less" package being included by default, e.g., Linaro nano rootfs. In those cases use the portable "pager" command instead of "less". Signed-off-by: Avik Sil Acked-by: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338287725-26382-1-git-send-email-avik.sil@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pager.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 1915de20dca..3322b8446e8 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -57,6 +57,10 @@ void setup_pager(void) } if (!pager) pager = getenv("PAGER"); + if (!pager) { + if (!access("/usr/bin/pager", X_OK)) + pager = "/usr/bin/pager"; + } if (!pager) pager = "less"; else if (!*pager || !strcmp(pager, "cat")) From 5f2a3d6191e49df5d56332d3b65d6636c355f635 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 24 May 2012 22:07:35 +0200 Subject: [PATCH 061/475] sbp-target: rename a variable to avoid name clash 'int login_id' shadows 'static atomic_t login_id'. Seen as compilation warning on x86-32. Signed-off-by: Stefan Richter Acked-by: Chris Boot Signed-off-by: Nicholas Bellinger --- drivers/target/sbp/sbp_target.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 37c609898f8..7e6136e2ce8 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -587,14 +587,14 @@ static void sbp_management_request_logout( { struct sbp_tport *tport = agent->tport; struct sbp_tpg *tpg = tport->tpg; - int login_id; + int id; struct sbp_login_descriptor *login; - login_id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc)); + id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc)); - login = sbp_login_find_by_id(tpg, login_id); + login = sbp_login_find_by_id(tpg, id); if (!login) { - pr_warn("cannot find login: %d\n", login_id); + pr_warn("cannot find login: %d\n", id); req->status.status = cpu_to_be32( STATUS_BLOCK_RESP(STATUS_RESP_REQUEST_COMPLETE) | From aba336bd1d46d6b0404b06f6915ed76150739057 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 May 2012 15:39:11 +1000 Subject: [PATCH 062/475] md: raid1/raid10: fix problem with merge_bvec_fn The new merge_bvec_fn which calls the corresponding function in subsidiary devices requires that mddev->merge_check_needed be set if any child has a merge_bvec_fn. However were were only setting that when a device was hot-added, not when a device was present from the start. This bug was introduced in 3.4 so patch is suitable for 3.4.y kernels. However that are conflicts in raid10.c so a separate patch will be needed for 3.4.y. Cc: stable@vger.kernel.org Reported-by: Sebastian Riemer Signed-off-by: NeilBrown --- drivers/md/raid1.c | 4 ++++ drivers/md/raid10.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 835de7168cd..a9c7981ddd2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2550,6 +2550,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) err = -EINVAL; spin_lock_init(&conf->device_lock); rdev_for_each(rdev, mddev) { + struct request_queue *q; int disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) @@ -2562,6 +2563,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (disk->rdev) goto abort; disk->rdev = rdev; + q = bdev_get_queue(rdev->bdev); + if (q->merge_bvec_fn) + mddev->merge_check_needed = 1; disk->head_position = 0; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 987db37cb87..99ae6068e45 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3475,6 +3475,7 @@ static int run(struct mddev *mddev) rdev_for_each(rdev, mddev) { long long diff; + struct request_queue *q; disk_idx = rdev->raid_disk; if (disk_idx < 0) @@ -3493,6 +3494,9 @@ static int run(struct mddev *mddev) goto out_free_conf; disk->rdev = rdev; } + q = bdev_get_queue(rdev->bdev); + if (q->merge_bvec_fn) + mddev->merge_check_needed = 1; diff = (rdev->new_data_offset - rdev->data_offset); if (!mddev->reshape_backwards) diff = -diff; From 121daad8fd1dce63076fa55aaedd5dc3f981b334 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Thu, 31 May 2012 20:53:08 +1000 Subject: [PATCH 063/475] hwrng: atmel-rng - fix race condition leading to repeated bits Data valid gets cleared by reading the ISR (status register) and NOT from reading ODATA (data register). A new data word can become available between checking ISR and reading ODATA, causing us to reuse the same data word next time atmel_trng_read() gets called, if that happens before the following data word is ready. With this fixed, rngtest no longer complains of 'Continous run' errors. Before: rngtest -c 1000 < /dev/hwrng rngtest 3 Copyright (c) 2004 by Henrique de Moraes Holschuh This is free software; see the source for copying conditions. There is NO warr. rngtest: starting FIPS tests... rngtest: bits received from input: 20000032 rngtest: FIPS 140-2 successes: 923 rngtest: FIPS 140-2 failures: 77 rngtest: FIPS 140-2(2001-10-10) Monobit: 0 rngtest: FIPS 140-2(2001-10-10) Poker: 0 rngtest: FIPS 140-2(2001-10-10) Runs: 1 rngtest: FIPS 140-2(2001-10-10) Long run: 0 rngtest: FIPS 140-2(2001-10-10) Continuous run: 76 rngtest: input channel speed: (min=721.402; avg=46003.510; max=49321.338)Kibitss rngtest: FIPS tests speed: (min=11.442; avg=12.714; max=12.801)Mibits/s rngtest: Program run time: 1931860 microseconds After: rngtest -c 1000 < /dev/hwrng rngtest 3 Copyright (c) 2004 by Henrique de Moraes Holschuh This is free software; see the source for copying conditions. There is NO warr. rngtest: starting FIPS tests... rngtest: bits received from input: 20000032 rngtest: FIPS 140-2 successes: 1000 rngtest: FIPS 140-2 failures: 0 rngtest: FIPS 140-2(2001-10-10) Monobit: 0 rngtest: FIPS 140-2(2001-10-10) Poker: 0 rngtest: FIPS 140-2(2001-10-10) Runs: 0 rngtest: FIPS 140-2(2001-10-10) Long run: 0 rngtest: FIPS 140-2(2001-10-10) Continuous run: 0 rngtest: input channel speed: (min=777.518; avg=36988.482; max=43115.342)Kibitss rngtest: FIPS tests speed: (min=11.951; avg=12.715; max=12.887)Mibits/s rngtest: Program run time: 2035543 microseconds Cc: stable@vger.kernel.org Signed-off-by: Peter Korsgaard Reported-by: George Pontis Acked-by: Nicolas Ferre Signed-off-by: Herbert Xu --- drivers/char/hw_random/atmel-rng.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index f518b99f53f..6289f0eee24 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -36,6 +36,13 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max, /* data ready? */ if (readl(trng->base + TRNG_ODATA) & 1) { *data = readl(trng->base + TRNG_ODATA); + /* + ensure data ready is only set again AFTER the next data + word is ready in case it got set between checking ISR + and reading ODATA, so we don't risk re-reading the + same word + */ + readl(trng->base + TRNG_ISR); return 4; } else return 0; From 7c8d51848a88aafdb68f42b6b650c83485ea2f84 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 30 May 2012 01:43:08 +0200 Subject: [PATCH 064/475] crypto: aesni-intel - fix unaligned cbc decrypt for x86-32 The 32 bit variant of cbc(aes) decrypt is using instructions requiring 128 bit aligned memory locations but fails to ensure this constraint in the code. Fix this by loading the data into intermediate registers with load unaligned instructions. This fixes reported general protection faults related to aesni. References: https://bugzilla.kernel.org/show_bug.cgi?id=43223 Reported-by: Daniel Cc: stable@kernel.org [v2.6.39+] Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index be6d9e365a8..3470624d783 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2460,10 +2460,12 @@ ENTRY(aesni_cbc_dec) pxor IN3, STATE4 movaps IN4, IV #else - pxor (INP), STATE2 - pxor 0x10(INP), STATE3 pxor IN1, STATE4 movaps IN2, IV + movups (INP), IN1 + pxor IN1, STATE2 + movups 0x10(INP), IN2 + pxor IN2, STATE3 #endif movups STATE1, (OUTP) movups STATE2, 0x10(OUTP) From 9e612a008fa7fe493a473454def56aa321479495 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 May 2012 13:08:53 +0100 Subject: [PATCH 065/475] drm/i915/crt: Do not rely upon the HPD presence pin Whilst most monitors do wire up the HPD presence pin, it seems quite a few KVM do not. Therefore if we simply rely on the HPD pin being asserted to indicate a connected monitor we fail miserable, so fall back to performing a DCC query for the EDID. Reported-and-tested-by: Matthieu LAVIE Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50501 Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_crt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 75a70c46ef1..844e93e1e39 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -453,13 +453,15 @@ intel_crt_detect(struct drm_connector *connector, bool force) struct intel_load_detect_pipe tmp; if (I915_HAS_HOTPLUG(dev)) { + /* We can not rely on the HPD pin always being correctly wired + * up, for example many KVM do not pass it through, and so + * only trust an assertion that the monitor is connected. + */ if (intel_crt_detect_hotplug(connector)) { DRM_DEBUG_KMS("CRT detected via hotplug\n"); return connector_status_connected; - } else { + } else DRM_DEBUG_KMS("CRT not detected via hotplug\n"); - return connector_status_disconnected; - } } if (intel_crt_detect_ddc(connector)) From 472606458f3e1ced5fe3cc5f04e90a6b5a4732cf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:43:26 +0900 Subject: [PATCH 066/475] perf callchain: Make callchain cursors TLS perf top -G has a race on callchain cursor between main thread and display thread. Since the callchain cursors are used locally make them thread-local data would solve the problem. Signed-off-by: Namhyung Kim Reported-by: Sunjin Yang Suggested-by: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sunjin Yang Link: http://lkml.kernel.org/r/1338443007-24857-1-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/callchain.c | 2 ++ tools/perf/util/callchain.h | 2 ++ tools/perf/util/hist.c | 7 ++++--- tools/perf/util/hist.h | 2 -- tools/perf/util/session.c | 14 +++++++------- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2400e009f14..25249f76329 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -152,7 +152,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, if (symbol_conf.use_callchain) { err = callchain_append(he->callchain, - &evsel->hists.callchain_cursor, + &callchain_cursor, sample->period); if (err) return err; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 871b540293e..6bb0277b7df 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -787,7 +787,7 @@ static void perf_event__process_sample(struct perf_tool *tool, } if (symbol_conf.use_callchain) { - err = callchain_append(he->callchain, &evsel->hists.callchain_cursor, + err = callchain_append(he->callchain, &callchain_cursor, sample->period); if (err) return; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f7106a8d9a..3a6bff47614 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -18,6 +18,8 @@ #include "util.h" #include "callchain.h" +__thread struct callchain_cursor callchain_cursor; + bool ip_callchain__valid(struct ip_callchain *chain, const union perf_event *event) { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 7f9c0f1ae3a..3bdb407f9cd 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -76,6 +76,8 @@ struct callchain_cursor { struct callchain_cursor_node *curr; }; +extern __thread struct callchain_cursor callchain_cursor; + static inline void callchain_init(struct callchain_root *root) { INIT_LIST_HEAD(&root->node.siblings); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1293b5ebea4..514e2a4b367 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -378,7 +378,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *hists, +static bool hists__collapse_insert_entry(struct hists *hists __used, struct rb_root *root, struct hist_entry *he) { @@ -397,8 +397,9 @@ static bool hists__collapse_insert_entry(struct hists *hists, iter->period += he->period; iter->nr_events += he->nr_events; if (symbol_conf.use_callchain) { - callchain_cursor_reset(&hists->callchain_cursor); - callchain_merge(&hists->callchain_cursor, iter->callchain, + callchain_cursor_reset(&callchain_cursor); + callchain_merge(&callchain_cursor, + iter->callchain, he->callchain); } hist_entry__free(he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index cfc64e293f9..34bb556d621 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -67,8 +67,6 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; - /* Best would be to reuse the session callchain cursor */ - struct callchain_cursor callchain_cursor; }; struct hist_entry *__hists__add_entry(struct hists *self, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 48206144758..3b6f8e460a3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -288,7 +288,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self, return bi; } -int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, +int machine__resolve_callchain(struct machine *self, + struct perf_evsel *evsel __used, struct thread *thread, struct ip_callchain *chain, struct symbol **parent) @@ -297,7 +298,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, unsigned int i; int err; - callchain_cursor_reset(&evsel->hists.callchain_cursor); + callchain_cursor_reset(&callchain_cursor); for (i = 0; i < chain->nr; i++) { u64 ip; @@ -333,7 +334,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, break; } - err = callchain_cursor_append(&evsel->hists.callchain_cursor, + err = callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym); if (err) return err; @@ -1428,7 +1429,6 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, int print_sym, int print_dso, int print_symoffset) { struct addr_location al; - struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; struct callchain_cursor_node *node; if (perf_event__preprocess_sample(event, machine, &al, sample, @@ -1446,10 +1446,10 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, error("Failed to resolve callchain. Skipping\n"); return; } - callchain_cursor_commit(cursor); + callchain_cursor_commit(&callchain_cursor); while (1) { - node = callchain_cursor_current(cursor); + node = callchain_cursor_current(&callchain_cursor); if (!node) break; @@ -1465,7 +1465,7 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, } printf("\n"); - callchain_cursor_advance(cursor); + callchain_cursor_advance(&callchain_cursor); } } else { From 114067b69e7b2c691faace0e33db2f04096f668d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:43:27 +0900 Subject: [PATCH 067/475] perf tools: Check if callchain is corrupted We faced segmentation fault on perf top -G at very high sampling rate due to a corrupted callchain. While the root cause was not revealed (I failed to figure it out), this patch tries to protect us from the segfault on such cases. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sunjin Yang Link: http://lkml.kernel.org/r/1338443007-24857-2-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 4 ++-- tools/perf/util/session.c | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f32578634d9..1817d4015e5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,6 +555,8 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; +#define PERF_MAX_STACK_DEPTH 255 + enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, PERF_CONTEXT_KERNEL = (__u64)-128, @@ -609,8 +611,6 @@ struct perf_guest_info_callbacks { #include #include -#define PERF_MAX_STACK_DEPTH 255 - struct perf_callchain_entry { __u64 nr; __u64 ip[PERF_MAX_STACK_DEPTH]; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b6f8e460a3..04d1e33f459 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -300,6 +300,11 @@ int machine__resolve_callchain(struct machine *self, callchain_cursor_reset(&callchain_cursor); + if (chain->nr > PERF_MAX_STACK_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + for (i = 0; i < chain->nr; i++) { u64 ip; struct addr_location al; @@ -318,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, case PERF_CONTEXT_USER: cpumode = PERF_RECORD_MISC_USER; break; default: - break; + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 0; } continue; } From aa5cdd308ddbb08959534be408eba8ef040b5989 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 31 May 2012 11:17:34 -0300 Subject: [PATCH 068/475] perf tools: Make --version show kernel version instead of pull req tag Before: $ perf --version perf version perf.urgent.for.mingo.5.g37da28 After: $ perf --version perf version 3.4.8941.g37da28.dirty Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vc9b4e6023iegz9kabr3yvyv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/PERF-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index ad73300f7ba..95264f30417 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -12,7 +12,7 @@ LF=' # First check if there is a .git to get the version from git describe # otherwise try to get the version from the kernel makefile if test -d ../../.git -o -f ../../.git && - VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) From a59e64a13a927fb7530bef39e9f5e7de8268137e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:51:45 +0900 Subject: [PATCH 069/475] perf tools: Update ioctl documentation for PERF_IOC_FLAG_GROUP The ioctl interface of perf event fd receives 3 arguments to control event group behavior but it lacked documentation. Signed-off-by: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338443506-25009-2-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/design.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/design.txt b/tools/perf/design.txt index bd0bb1b1279..67e5d0cace8 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -409,14 +409,15 @@ Counters can be enabled and disabled in two ways: via ioctl and via prctl. When a counter is disabled, it doesn't count or generate events but does continue to exist and maintain its count value. -An individual counter or counter group can be enabled with +An individual counter can be enabled with - ioctl(fd, PERF_EVENT_IOC_ENABLE); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); or disabled with - ioctl(fd, PERF_EVENT_IOC_DISABLE); + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); +For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument. Enabling or disabling the leader of a group enables or disables the whole group; that is, while the group leader is disabled, none of the counters in the group will count. Enabling or disabling a member of a From 55da80059de6c7533724fcd95f16c5d5618ecf4d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:51:46 +0900 Subject: [PATCH 070/475] perf evlist: Pass third argument to ioctl explicitly The ioctl on perf event fd wants 3 arguments but we only passed 2. As the only user of the functions is perf record and it calls them for every event (regardless of group setting), just pass 0 for now. Signed-off-by: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338443506-25009-3-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ed277e5627c..7400fb3fc50 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -274,7 +274,8 @@ void perf_evlist__disable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_DISABLE, 0); } } } @@ -287,7 +288,8 @@ void perf_evlist__enable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_ENABLE, 0); } } } From 8db4841fc72acc58254029f050226ea5f8103854 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 30 May 2012 14:23:42 +0200 Subject: [PATCH 071/475] perf symbols: Handle different endians properly during symbol load Currently we dont care about the file object's endianness. It's possible we read buildid file object from different architecture than we are currentlly running on. So we need to care about properly reading such object's data - handle different endianness properly. Adding: needs_swap DSO field dso__swap_init function to initialize DSO's needs_swap DSO__SWAP to read the data with proper swaps Together with other endianity patches, this change fixies perf report discrepancies on origin and target systems as described in test 1 below, e.g. following perf report diff: ... 0.12% ps [kernel.kallsyms] [k] clear_page - 0.12% awk bash [.] alloc_word_desc + 0.12% awk bash [.] yyparse 0.11% beah-rhts-task libpython2.6.so.1.0 [.] 0x5560e 0.10% perf libc-2.12.so [.] __ctype_toupper_loc - 0.09% rhts-test-runne bash [.] maybe_make_export_env + 0.09% rhts-test-runne bash [.] 0x385a0 0.09% ps [kernel.kallsyms] [k] page_fault ... Note, running following to test perf endianity handling: test 1) - origin system: # perf record -a -- sleep 10 (any perf record will do) # perf report > report.origin # perf archive perf.data - copy the perf.data, report.origin and perf.data.tar.bz2 to a target system and run: # tar xjvf perf.data.tar.bz2 -C ~/.debug # perf report > report.target # diff -u report.origin report.target - the diff should produce no output (besides some white space stuff and possibly different date/TZ output) test 1) - origin system: # perf record -ag -fo /tmp/perf.data -- sleep 1 - mount origin system root to the target system on /mnt/origin - target system: # perf script --symfs /mnt/origin -I -i /mnt/origin/tmp/perf.data \ --kallsyms /mnt/origin/proc/kallsyms - complete perf.data header is displayed Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338380624-7443-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 33 ++++++++++++++++++++++++++++++++- tools/perf/util/symbol.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e2ba8858f3e..9d04dcd9181 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -323,6 +323,7 @@ struct dso *dso__new(const char *name) dso->sorted_by_name = 0; dso->has_build_id = 0; dso->kernel = DSO_TYPE_USER; + dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); } @@ -1156,6 +1157,33 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) return -1; } +static int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso->needs_swap = DSO_SWAP__NO; + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso->needs_swap = DSO_SWAP__YES; + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso->needs_swap = DSO_SWAP__YES; + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + static int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, symbol_filter_t filter, int kmodule, int want_symtab) @@ -1187,6 +1215,9 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, goto out_elf_end; } + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + goto out_elf_end; + /* Always reject images with a mismatched build-id: */ if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; @@ -1272,7 +1303,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, if (opdsec && sym.st_shndx == opdidx) { u32 offset = sym.st_value - opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; - sym.st_value = *opd; + sym.st_value = DSO__SWAP(dso, u64, *opd); sym.st_shndx = elf_addr_to_index(elf, sym.st_value); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5649d63798c..af0752b1aca 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); @@ -160,11 +161,18 @@ enum dso_kernel_type { DSO_TYPE_GUEST_KERNEL }; +enum dso_swap_type { + DSO_SWAP__UNSET, + DSO_SWAP__NO, + DSO_SWAP__YES, +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; enum dso_kernel_type kernel; + enum dso_swap_type needs_swap; u8 adjust_symbols:1; u8 has_build_id:1; u8 hit:1; @@ -182,6 +190,28 @@ struct dso { char name[0]; }; +#define DSO__SWAP(dso, type, val) \ +({ \ + type ____r = val; \ + BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \ + if (dso->needs_swap == DSO_SWAP__YES) { \ + switch (sizeof(____r)) { \ + case 2: \ + ____r = bswap_16(val); \ + break; \ + case 4: \ + ____r = bswap_32(val); \ + break; \ + case 8: \ + ____r = bswap_64(val); \ + break; \ + default: \ + BUG_ON(1); \ + } \ + } \ + ____r; \ +}) + struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); From 268fb20f832e1eb4afd5113ee31fef9332986b13 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 30 May 2012 14:23:43 +0200 Subject: [PATCH 072/475] perf session: Handle endianity swap on sample_id_all header data Adding endianity swapping for event header attached via sample_id_all. Currently we dont do that and it's causing wrong data to be read when running report on architecture with different endianity than the record. The perf is currently able to process 32-bit PPC samples on 32-bit and 64-bit x86. Together with other endianity patches, this change fixies perf report discrepancies on origin and target systems as described in test 1 below, e.g. following perf report diff: ... 0.12% ps [kernel.kallsyms] [k] clear_page - 0.12% awk bash [.] alloc_word_desc + 0.12% awk bash [.] yyparse 0.11% beah-rhts-task libpython2.6.so.1.0 [.] 0x5560e 0.10% perf libc-2.12.so [.] __ctype_toupper_loc - 0.09% rhts-test-runne bash [.] maybe_make_export_env + 0.09% rhts-test-runne bash [.] 0x385a0 0.09% ps [kernel.kallsyms] [k] page_fault ... Note, running following to test perf endianity handling: test 1) - origin system: # perf record -a -- sleep 10 (any perf record will do) # perf report > report.origin # perf archive perf.data - copy the perf.data, report.origin and perf.data.tar.bz2 to a target system and run: # tar xjvf perf.data.tar.bz2 -C ~/.debug # perf report > report.target # diff -u report.origin report.target - the diff should produce no output (besides some white space stuff and possibly different date/TZ output) test 2) - origin system: # perf record -ag -fo /tmp/perf.data -- sleep 1 - mount origin system root to the target system on /mnt/origin - target system: # perf script --symfs /mnt/origin -I -i /mnt/origin/tmp/perf.data \ --kallsyms /mnt/origin/proc/kallsyms - complete perf.data header is displayed Signed-off-by: Jiri Olsa Reviewed-by: David Ahern Tested-by: David Ahern Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338380624-7443-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 67 ++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 04d1e33f459..2600916efa8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -454,37 +454,65 @@ void mem_bswap_64(void *src, int byte_size) } } -static void perf_event__all64_swap(union perf_event *event) +static void swap_sample_id_all(union perf_event *event, void *data) +{ + void *end = (void *) event + event->header.size; + int size = end - data; + + BUG_ON(size % sizeof(u64)); + mem_bswap_64(data, size); +} + +static void perf_event__all64_swap(union perf_event *event, + bool sample_id_all __used) { struct perf_event_header *hdr = &event->header; mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); } -static void perf_event__comm_swap(union perf_event *event) +static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) { event->comm.pid = bswap_32(event->comm.pid); event->comm.tid = bswap_32(event->comm.tid); + + if (sample_id_all) { + void *data = &event->comm.comm; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__mmap_swap(union perf_event *event) +static void perf_event__mmap_swap(union perf_event *event, + bool sample_id_all) { event->mmap.pid = bswap_32(event->mmap.pid); event->mmap.tid = bswap_32(event->mmap.tid); event->mmap.start = bswap_64(event->mmap.start); event->mmap.len = bswap_64(event->mmap.len); event->mmap.pgoff = bswap_64(event->mmap.pgoff); + + if (sample_id_all) { + void *data = &event->mmap.filename; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__task_swap(union perf_event *event) +static void perf_event__task_swap(union perf_event *event, bool sample_id_all) { event->fork.pid = bswap_32(event->fork.pid); event->fork.tid = bswap_32(event->fork.tid); event->fork.ppid = bswap_32(event->fork.ppid); event->fork.ptid = bswap_32(event->fork.ptid); event->fork.time = bswap_64(event->fork.time); + + if (sample_id_all) + swap_sample_id_all(event, &event->fork + 1); } -static void perf_event__read_swap(union perf_event *event) +static void perf_event__read_swap(union perf_event *event, bool sample_id_all) { event->read.pid = bswap_32(event->read.pid); event->read.tid = bswap_32(event->read.tid); @@ -492,6 +520,9 @@ static void perf_event__read_swap(union perf_event *event) event->read.time_enabled = bswap_64(event->read.time_enabled); event->read.time_running = bswap_64(event->read.time_running); event->read.id = bswap_64(event->read.id); + + if (sample_id_all) + swap_sample_id_all(event, &event->read + 1); } static u8 revbyte(u8 b) @@ -543,7 +574,8 @@ void perf_event__attr_swap(struct perf_event_attr *attr) swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); } -static void perf_event__hdr_attr_swap(union perf_event *event) +static void perf_event__hdr_attr_swap(union perf_event *event, + bool sample_id_all __used) { size_t size; @@ -554,18 +586,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event) mem_bswap_64(event->attr.id, size); } -static void perf_event__event_type_swap(union perf_event *event) +static void perf_event__event_type_swap(union perf_event *event, + bool sample_id_all __used) { event->event_type.event_type.event_id = bswap_64(event->event_type.event_type.event_id); } -static void perf_event__tracing_data_swap(union perf_event *event) +static void perf_event__tracing_data_swap(union perf_event *event, + bool sample_id_all __used) { event->tracing_data.size = bswap_32(event->tracing_data.size); } -typedef void (*perf_event__swap_op)(union perf_event *event); +typedef void (*perf_event__swap_op)(union perf_event *event, + bool sample_id_all); static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_MMAP] = perf_event__mmap_swap, @@ -999,6 +1034,15 @@ static int perf_session__process_user_event(struct perf_session *session, union } } +static void event_swap(union perf_event *event, bool sample_id_all) +{ + perf_event__swap_op swap; + + swap = perf_event__swap_ops[event->header.type]; + if (swap) + swap(event, sample_id_all); +} + static int perf_session__process_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, @@ -1007,9 +1051,8 @@ static int perf_session__process_event(struct perf_session *session, struct perf_sample sample; int ret; - if (session->header.needs_swap && - perf_event__swap_ops[event->header.type]) - perf_event__swap_ops[event->header.type](event); + if (session->header.needs_swap) + event_swap(event, session->sample_id_all); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; From 37073f9e449bc430e6c40b9cffc2558002a0256a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 30 May 2012 14:23:44 +0200 Subject: [PATCH 073/475] perf evsel: Fix 32 bit values endianity swap for sample_id_all header We swap the sample_id_all header by u64 pointers. Some members of the header happen to be 32 bit values. We need to handle them separatelly. Together with other endianity patches, this change fixies perf report discrepancies on origin and target systems as described in test 1 below, e.g. following perf report diff: ... 0.12% ps [kernel.kallsyms] [k] clear_page - 0.12% awk bash [.] alloc_word_desc + 0.12% awk bash [.] yyparse 0.11% beah-rhts-task libpython2.6.so.1.0 [.] 0x5560e 0.10% perf libc-2.12.so [.] __ctype_toupper_loc - 0.09% rhts-test-runne bash [.] maybe_make_export_env + 0.09% rhts-test-runne bash [.] 0x385a0 0.09% ps [kernel.kallsyms] [k] page_fault ... Note, running following to test perf endianity handling: test 1) - origin system: # perf record -a -- sleep 10 (any perf record will do) # perf report > report.origin # perf archive perf.data - copy the perf.data, report.origin and perf.data.tar.bz2 to a target system and run: # tar xjvf perf.data.tar.bz2 -C ~/.debug # perf report > report.target # diff -u report.origin report.target - the diff should produce no output (besides some white space stuff and possibly different date/TZ output) test 2) - origin system: # perf record -ag -fo /tmp/perf.data -- sleep 1 - mount origin system root to the target system on /mnt/origin - target system: # perf script --symfs /mnt/origin -I -i /mnt/origin/tmp/perf.data \ --kallsyms /mnt/origin/proc/kallsyms - complete perf.data header is displayed Signed-off-by: Jiri Olsa Reviewed-by: David Ahern Tested-by: David Ahern Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338380624-7443-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 91d19138f3e..9f6cebd798e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -494,16 +494,24 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, } static int perf_event__parse_id_sample(const union perf_event *event, u64 type, - struct perf_sample *sample) + struct perf_sample *sample, + bool swapped) { const u64 *array = event->sample.array; + union u64_swap u; array += ((event->header.size - sizeof(event->header)) / sizeof(u64)) - 1; if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - sample->cpu = *p; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + } + + sample->cpu = u.val32[0]; array--; } @@ -523,9 +531,16 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - sample->pid = p[0]; - sample->tid = p[1]; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } + + sample->pid = u.val32[0]; + sample->tid = u.val32[1]; } return 0; @@ -562,7 +577,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, if (event->header.type != PERF_RECORD_SAMPLE) { if (!sample_id_all) return 0; - return perf_event__parse_id_sample(event, type, data); + return perf_event__parse_id_sample(event, type, data, swapped); } array = event->sample.array; From 378474e4b23183002f1791b294a4440b6b7df36a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 31 May 2012 17:16:56 +0530 Subject: [PATCH 074/475] perf symbols: Check for valid dso before creating map dso__new() can return NULL. Hence verify dso before creating a new map. Signed-off-by: Srikar Dronamraju Suggested-by: Arnaldo Carvalho de Melo Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20120531114656.23691.54223.sendpatchset@srdronam.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9d04dcd9181..3e2e5ea0f03 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2817,8 +2817,11 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type, struct map *dso__new_map(const char *name) { + struct map *map = NULL; struct dso *dso = dso__new(name); - struct map *map = map__new2(0, dso, MAP__FUNCTION); + + if (dso) + map = map__new2(0, dso, MAP__FUNCTION); return map; } From a23c4dc422cff7bd30f40f5dc7c9ac4a6339005a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 31 May 2012 17:16:43 +0530 Subject: [PATCH 075/475] perf uprobes: Remove unnecessary check before strlist__delete Since strlist__delete() itself checks, the additional check before calling strlist__delete() is redundant. No Functional change. Signed-off-by: Srikar Dronamraju Suggested-by: Arnaldo Carvalho de Melo Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20120531114643.23691.38666.sendpatchset@srdronam.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 59dccc98b55..0dda25d82d0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2164,16 +2164,12 @@ int del_perf_probe_events(struct strlist *dellist) error: if (kfd >= 0) { - if (namelist) - strlist__delete(namelist); - + strlist__delete(namelist); close(kfd); } if (ufd >= 0) { - if (unamelist) - strlist__delete(unamelist); - + strlist__delete(unamelist); close(ufd); } From 5e626254206a709c6e937f3dda69bf26c7344f6f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 29 May 2012 13:07:31 +0200 Subject: [PATCH 076/475] xen/setup: filter APERFMPERF cpuid feature out Xen PV kernels allow access to the APERF/MPERF registers to read the effective frequency. Access to the MSRs is however redirected to the currently scheduled physical CPU, making consecutive read and compares unreliable. In addition each rdmsr traps into the hypervisor. So to avoid bogus readouts and expensive traps, disable the kernel internal feature flag for APERF/MPERF if running under Xen. This will a) remove the aperfmperf flag from /proc/cpuinfo b) not mislead the power scheduler (arch/x86/kernel/cpu/sched.c) to use the feature to improve scheduling (by default disabled) c) not mislead the cpufreq driver to use the MSRs This does not cover userland programs which access the MSRs via the device file interface, but this will be addressed separately. Signed-off-by: Andre Przywara Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d1f9a0472d4..272ebd0ce32 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -208,6 +208,9 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +#define CPUID_THERM_POWER_LEAF 6 +#define APERFMPERF_PRESENT 0 + static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; @@ -241,6 +244,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *dx = cpuid_leaf5_edx_val; return; + case CPUID_THERM_POWER_LEAF: + /* Disabling APERFMPERF for kernel usage */ + maskecx = ~(1 << APERFMPERF_PRESENT); + break; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; From cb7225feec627e91d598198996429e9ee6804f8d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:51:44 +0900 Subject: [PATCH 077/475] perf: Remove duplicate invocation on perf_event_for_each The @func callback was invoked twice for group leader when perf_event_for_each() called. It seems the commit 75f937f24bd9 ("perf_counter: Fix ctx->mutex vs counter ->mutex inversion") made the mistake during the change. Signed-off-by: Namhyung Kim Acked-by: Peter Zijlstra Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338443506-25009-1-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5b06cbbf693..f85c0154b33 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3181,7 +3181,6 @@ static void perf_event_for_each(struct perf_event *event, event = event->group_leader; perf_event_for_each_child(event, func); - func(event); list_for_each_entry(sibling, &event->sibling_list, group_entry) perf_event_for_each_child(sibling, func); mutex_unlock(&ctx->mutex); From b3b02ae5865c2dcd506322e0fc6def59a042e72f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 May 2012 15:26:38 -0400 Subject: [PATCH 078/475] NFSv4.1: Fix a request leak on the back channel If the call to svc_process_common() fails, then the request needs to be freed before we can exit bc_svc_process. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/svc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 017c0117d15..074df5a564d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1377,7 +1377,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, sizeof(req->rq_snd_buf)); return bc_send(req); } else { - /* Nothing to do to drop request */ + /* drop request */ + xprt_free_bc_request(req); return 0; } } From 0013fb4ca3171c64a4a5d3851fb591bb575e7f04 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 31 May 2012 13:03:26 +0400 Subject: [PATCH 079/475] CIFS: Fix possible wrong memory allocation when cifs_reconnect sets maxBuf to 0 and we try to calculate a size of memory we need to store locks. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 253170dfa71..de8abb6f7b5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -876,7 +876,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) struct cifsLockInfo *li, *tmp; struct cifs_tcon *tcon; struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); - unsigned int num, max_num; + unsigned int num, max_num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; int types[] = {LOCKING_ANDX_LARGE_FILES, LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; @@ -892,8 +892,19 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } - max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / - sizeof(LOCKING_ANDX_RANGE); + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (!max_buf) { + mutex_unlock(&cinode->lock_mutex); + FreeXid(xid); + return -EINVAL; + } + + max_num = (max_buf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { mutex_unlock(&cinode->lock_mutex); @@ -1218,7 +1229,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) int types[] = {LOCKING_ANDX_LARGE_FILES, LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; unsigned int i; - unsigned int max_num, num; + unsigned int max_num, num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); @@ -1228,8 +1239,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) INIT_LIST_HEAD(&tmp_llist); - max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / - sizeof(LOCKING_ANDX_RANGE); + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (!max_buf) + return -EINVAL; + + max_num = (max_buf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) return -ENOMEM; From ea319d57d3372a9dbee0b3807d75bb36b8d54adc Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 31 May 2012 13:59:36 +0400 Subject: [PATCH 080/475] CIFS: Improve identation in cifs_unlock_range Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 75 +++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index de8abb6f7b5..513adbc211d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1266,46 +1266,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) continue; if (types[i] != li->type) continue; - if (!cinode->can_cache_brlcks) { - cur->Pid = cpu_to_le16(li->pid); - cur->LengthLow = cpu_to_le32((u32)li->length); - cur->LengthHigh = - cpu_to_le32((u32)(li->length>>32)); - cur->OffsetLow = cpu_to_le32((u32)li->offset); - cur->OffsetHigh = - cpu_to_le32((u32)(li->offset>>32)); - /* - * We need to save a lock here to let us add - * it again to the file's list if the unlock - * range request fails on the server. - */ - list_move(&li->llist, &tmp_llist); - if (++num == max_num) { - stored_rc = cifs_lockv(xid, tcon, - cfile->netfid, - li->type, num, - 0, buf); - if (stored_rc) { - /* - * We failed on the unlock range - * request - add all locks from - * the tmp list to the head of - * the file's list. - */ - cifs_move_llist(&tmp_llist, - &cfile->llist); - rc = stored_rc; - } else - /* - * The unlock range request - * succeed - free the tmp list. - */ - cifs_free_llist(&tmp_llist); - cur = buf; - num = 0; - } else - cur++; - } else { + if (cinode->can_cache_brlcks) { /* * We can cache brlock requests - simply remove * a lock from the file's list. @@ -1313,7 +1274,41 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) list_del(&li->llist); cifs_del_lock_waiters(li); kfree(li); + continue; } + cur->Pid = cpu_to_le16(li->pid); + cur->LengthLow = cpu_to_le32((u32)li->length); + cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); + cur->OffsetLow = cpu_to_le32((u32)li->offset); + cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); + /* + * We need to save a lock here to let us add it again to + * the file's list if the unlock range request fails on + * the server. + */ + list_move(&li->llist, &tmp_llist); + if (++num == max_num) { + stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + li->type, num, 0, buf); + if (stored_rc) { + /* + * We failed on the unlock range + * request - add all locks from the tmp + * list to the head of the file's list. + */ + cifs_move_llist(&tmp_llist, + &cfile->llist); + rc = stored_rc; + } else + /* + * The unlock range request succeed - + * free the tmp list. + */ + cifs_free_llist(&tmp_llist); + cur = buf; + num = 0; + } else + cur++; } if (num) { stored_rc = cifs_lockv(xid, tcon, cfile->netfid, From 7f0adb53bcf5bdb92236cda8ec92ea5e40993028 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 28 May 2012 15:50:10 +0400 Subject: [PATCH 081/475] CIFS: Make accessing is_valid_oplock/dump_detail ops struct field safe Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/connect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ccafdedd0db..3647b1a4540 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1058,13 +1058,15 @@ cifs_demultiplex_thread(void *p) if (mid_entry != NULL) { if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); - } else if (!server->ops->is_oplock_break(buf, server)) { + } else if (!server->ops->is_oplock_break || + !server->ops->is_oplock_break(buf, server)) { cERROR(1, "No task to wake, unknown frame received! " "NumMids %d", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, HEADER_SIZE(server)); #ifdef CONFIG_CIFS_DEBUG2 - server->ops->dump_detail(buf); + if (server->ops->dump_detail) + server->ops->dump_detail(buf); cifs_dump_mids(server); #endif /* CIFS_DEBUG2 */ From 88257360605f9362dc4d79326c268dd334f61c90 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 23 May 2012 14:01:59 +0400 Subject: [PATCH 082/475] CIFS: Move get_next_mid to ops struct Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 7 ++++ fs/cifs/cifsproto.h | 1 - fs/cifs/cifssmb.c | 8 ++-- fs/cifs/connect.c | 2 +- fs/cifs/misc.c | 89 +-------------------------------------------- fs/cifs/smb1ops.c | 89 +++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/transport.c | 2 +- 7 files changed, 103 insertions(+), 95 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 20350a93ed9..6df0cbe1cbc 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -174,6 +174,7 @@ struct smb_version_operations { void (*add_credits)(struct TCP_Server_Info *, const unsigned int); void (*set_credits)(struct TCP_Server_Info *, const int); int * (*get_credits_field)(struct TCP_Server_Info *); + __u64 (*get_next_mid)(struct TCP_Server_Info *); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); /* data length from read response message */ @@ -399,6 +400,12 @@ set_credits(struct TCP_Server_Info *server, const int val) server->ops->set_credits(server, val); } +static inline __u64 +get_next_mid(struct TCP_Server_Info *server) +{ + return server->ops->get_next_mid(server); +} + /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 5ec21ecf798..0a6cbfe2761 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -114,7 +114,6 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct, void **request_buf); extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp); -extern __u64 GetNextMid(struct TCP_Server_Info *server); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b5ad716b264..5b400730c21 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -268,7 +268,7 @@ small_smb_init_no_tc(const int smb_command, const int wct, return rc; buffer = (struct smb_hdr *)*request_buf; - buffer->Mid = GetNextMid(ses->server); + buffer->Mid = get_next_mid(ses->server); if (ses->capabilities & CAP_UNICODE) buffer->Flags2 |= SMBFLG2_UNICODE; if (ses->capabilities & CAP_STATUS32) @@ -402,7 +402,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) cFYI(1, "secFlags 0x%x", secFlags); - pSMB->hdr.Mid = GetNextMid(server); + pSMB->hdr.Mid = get_next_mid(server); pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) @@ -782,7 +782,7 @@ CIFSSMBLogoff(const int xid, struct cifs_ses *ses) return rc; } - pSMB->hdr.Mid = GetNextMid(ses->server); + pSMB->hdr.Mid = get_next_mid(ses->server); if (ses->server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) @@ -4762,7 +4762,7 @@ getDFSRetry: /* server pointer checked in called function, but should never be null here anyway */ - pSMB->hdr.Mid = GetNextMid(ses->server); + pSMB->hdr.Mid = get_next_mid(ses->server); pSMB->hdr.Tid = ses->ipc_tid; pSMB->hdr.Uid = ses->Suid; if (ses->capabilities & CAP_STATUS32) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3647b1a4540..78db68a5cf4 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3940,7 +3940,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, NULL /*no tid */ , 4 /*wct */ ); - smb_buffer->Mid = GetNextMid(ses->server); + smb_buffer->Mid = get_next_mid(ses->server); smb_buffer->Uid = ses->Suid; pSMB = (TCONX_REQ *) smb_buffer; pSMBr = (TCONX_RSP *) smb_buffer_response; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index e2552d2b2e4..557506ae1e2 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -212,93 +212,6 @@ cifs_small_buf_release(void *buf_to_free) return; } -/* - * Find a free multiplex id (SMB mid). Otherwise there could be - * mid collisions which might cause problems, demultiplexing the - * wrong response to this request. Multiplex ids could collide if - * one of a series requests takes much longer than the others, or - * if a very large number of long lived requests (byte range - * locks or FindNotify requests) are pending. No more than - * 64K-1 requests can be outstanding at one time. If no - * mids are available, return zero. A future optimization - * could make the combination of mids and uid the key we use - * to demultiplex on (rather than mid alone). - * In addition to the above check, the cifs demultiplex - * code already used the command code as a secondary - * check of the frame and if signing is negotiated the - * response would be discarded if the mid were the same - * but the signature was wrong. Since the mid is not put in the - * pending queue until later (when it is about to be dispatched) - * we do have to limit the number of outstanding requests - * to somewhat less than 64K-1 although it is hard to imagine - * so many threads being in the vfs at one time. - */ -__u64 GetNextMid(struct TCP_Server_Info *server) -{ - __u64 mid = 0; - __u16 last_mid, cur_mid; - bool collision; - - spin_lock(&GlobalMid_Lock); - - /* mid is 16 bit only for CIFS/SMB */ - cur_mid = (__u16)((server->CurrentMid) & 0xffff); - /* we do not want to loop forever */ - last_mid = cur_mid; - cur_mid++; - - /* - * This nested loop looks more expensive than it is. - * In practice the list of pending requests is short, - * fewer than 50, and the mids are likely to be unique - * on the first pass through the loop unless some request - * takes longer than the 64 thousand requests before it - * (and it would also have to have been a request that - * did not time out). - */ - while (cur_mid != last_mid) { - struct mid_q_entry *mid_entry; - unsigned int num_mids; - - collision = false; - if (cur_mid == 0) - cur_mid++; - - num_mids = 0; - list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { - ++num_mids; - if (mid_entry->mid == cur_mid && - mid_entry->mid_state == MID_REQUEST_SUBMITTED) { - /* This mid is in use, try a different one */ - collision = true; - break; - } - } - - /* - * if we have more than 32k mids in the list, then something - * is very wrong. Possibly a local user is trying to DoS the - * box by issuing long-running calls and SIGKILL'ing them. If - * we get to 2^16 mids then we're in big trouble as this - * function could loop forever. - * - * Go ahead and assign out the mid in this situation, but force - * an eventual reconnect to clean out the pending_mid_q. - */ - if (num_mids > 32768) - server->tcpStatus = CifsNeedReconnect; - - if (!collision) { - mid = (__u64)cur_mid; - server->CurrentMid = mid; - break; - } - cur_mid++; - } - spin_unlock(&GlobalMid_Lock); - return mid; -} - /* NB: MID can not be set if treeCon not passed in, in that case it is responsbility of caller to set the mid */ void @@ -334,7 +247,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , /* Uid is not converted */ buffer->Uid = treeCon->ses->Suid; - buffer->Mid = GetNextMid(treeCon->ses->server); + buffer->Mid = get_next_mid(treeCon->ses->server); } if (treeCon->Flags & SMB_SHARE_IS_IN_DFS) buffer->Flags2 |= SMBFLG2_DFS; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d9d615fbed3..6dec38f5522 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -125,6 +125,94 @@ cifs_get_credits_field(struct TCP_Server_Info *server) return &server->credits; } +/* + * Find a free multiplex id (SMB mid). Otherwise there could be + * mid collisions which might cause problems, demultiplexing the + * wrong response to this request. Multiplex ids could collide if + * one of a series requests takes much longer than the others, or + * if a very large number of long lived requests (byte range + * locks or FindNotify requests) are pending. No more than + * 64K-1 requests can be outstanding at one time. If no + * mids are available, return zero. A future optimization + * could make the combination of mids and uid the key we use + * to demultiplex on (rather than mid alone). + * In addition to the above check, the cifs demultiplex + * code already used the command code as a secondary + * check of the frame and if signing is negotiated the + * response would be discarded if the mid were the same + * but the signature was wrong. Since the mid is not put in the + * pending queue until later (when it is about to be dispatched) + * we do have to limit the number of outstanding requests + * to somewhat less than 64K-1 although it is hard to imagine + * so many threads being in the vfs at one time. + */ +static __u64 +cifs_get_next_mid(struct TCP_Server_Info *server) +{ + __u64 mid = 0; + __u16 last_mid, cur_mid; + bool collision; + + spin_lock(&GlobalMid_Lock); + + /* mid is 16 bit only for CIFS/SMB */ + cur_mid = (__u16)((server->CurrentMid) & 0xffff); + /* we do not want to loop forever */ + last_mid = cur_mid; + cur_mid++; + + /* + * This nested loop looks more expensive than it is. + * In practice the list of pending requests is short, + * fewer than 50, and the mids are likely to be unique + * on the first pass through the loop unless some request + * takes longer than the 64 thousand requests before it + * (and it would also have to have been a request that + * did not time out). + */ + while (cur_mid != last_mid) { + struct mid_q_entry *mid_entry; + unsigned int num_mids; + + collision = false; + if (cur_mid == 0) + cur_mid++; + + num_mids = 0; + list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { + ++num_mids; + if (mid_entry->mid == cur_mid && + mid_entry->mid_state == MID_REQUEST_SUBMITTED) { + /* This mid is in use, try a different one */ + collision = true; + break; + } + } + + /* + * if we have more than 32k mids in the list, then something + * is very wrong. Possibly a local user is trying to DoS the + * box by issuing long-running calls and SIGKILL'ing them. If + * we get to 2^16 mids then we're in big trouble as this + * function could loop forever. + * + * Go ahead and assign out the mid in this situation, but force + * an eventual reconnect to clean out the pending_mid_q. + */ + if (num_mids > 32768) + server->tcpStatus = CifsNeedReconnect; + + if (!collision) { + mid = (__u64)cur_mid; + server->CurrentMid = mid; + break; + } + cur_mid++; + } + spin_unlock(&GlobalMid_Lock); + return mid; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -133,6 +221,7 @@ struct smb_version_operations smb1_operations = { .add_credits = cifs_add_credits, .set_credits = cifs_set_credits, .get_credits_field = cifs_get_credits_field, + .get_next_mid = cifs_get_next_mid, .read_data_offset = cifs_read_data_offset, .read_data_length = cifs_read_data_length, .map_error = map_smb_to_linux_error, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1b36ffe6a47..3097ee58fd7 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -779,7 +779,7 @@ send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; pSMB->Timeout = 0; - pSMB->hdr.Mid = GetNextMid(ses->server); + pSMB->hdr.Mid = get_next_mid(ses->server); return SendReceive(xid, ses, in_buf, out_buf, &bytes_returned, 0); From cfb46f433a4da97c31780e08a259fac2cb6bd61f Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 25 Apr 2012 14:33:33 +0100 Subject: [PATCH 083/475] acpi_video: fix leaking PCI references Signed-off-by: Alan Cox Acked-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/video.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 9577b6fa265..66e8f7333e9 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1745,6 +1745,7 @@ static int acpi_video_bus_remove(struct acpi_device *device, int type) static int __init intel_opregion_present(void) { + int i915 = 0; #if defined(CONFIG_DRM_I915) || defined(CONFIG_DRM_I915_MODULE) struct pci_dev *dev = NULL; u32 address; @@ -1757,10 +1758,10 @@ static int __init intel_opregion_present(void) pci_read_config_dword(dev, 0xfc, &address); if (!address) continue; - return 1; + i915 = 1; } #endif - return 0; + return i915; } int acpi_video_register(void) From c6996bdd850fb53319918487d5f674203517fdc5 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 25 Apr 2012 14:33:48 +0100 Subject: [PATCH 084/475] acpi_video: Intel video is not always i915 Stop it poking at random registers on the i740 cards that may be out there still. As per Matthew's feedback remove the conditional checks and never enable the opregion handling unless an appropriate driver has been loaded. Signed-off-by: Alan Cox Signed-off-by: Len Brown --- drivers/acpi/video.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 66e8f7333e9..609262dc125 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1743,10 +1743,18 @@ static int acpi_video_bus_remove(struct acpi_device *device, int type) return 0; } +static int __init is_i740(struct pci_dev *dev) +{ + if (dev->device == 0x00D1) + return 1; + if (dev->device == 0x7000) + return 1; + return 0; +} + static int __init intel_opregion_present(void) { - int i915 = 0; -#if defined(CONFIG_DRM_I915) || defined(CONFIG_DRM_I915_MODULE) + int opregion = 0; struct pci_dev *dev = NULL; u32 address; @@ -1755,13 +1763,15 @@ static int __init intel_opregion_present(void) continue; if (dev->vendor != PCI_VENDOR_ID_INTEL) continue; + /* We don't want to poke around undefined i740 registers */ + if (is_i740(dev)) + continue; pci_read_config_dword(dev, 0xfc, &address); if (!address) continue; - i915 = 1; + opregion = 1; } -#endif - return i915; + return opregion; } int acpi_video_register(void) From 155689defc782b486a7e6776a57ecc4ebb37ed52 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 25 Apr 2012 14:34:04 +0100 Subject: [PATCH 085/475] gma500: don't register the ACPI video bus We are not yet ready for this and it makes a mess on some devices. Signed-off-by: Alan Cox Signed-off-by: Len Brown --- drivers/gpu/drm/gma500/psb_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index c34adf9d910..09af2ffbb30 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -349,7 +349,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long chipset) PSB_WSGX32(0x30000000, PSB_CR_BIF_3D_REQ_BASE); /* igd_opregion_init(&dev_priv->opregion_dev); */ - acpi_video_register(); +/* acpi_video_register(); */ if (dev_priv->lid_state) psb_lid_timer_init(dev_priv); From 301f33fbcf4ced53b3de114846ecece5d6aafeeb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Apr 2012 14:19:16 +0300 Subject: [PATCH 086/475] ACPI video: use after input_unregister_device() We can't use "input" anymore after calling input_unregister_device(). The call to input_free_device() is a double free. The normal way to deal with this is to make input_register_device() the last function called in the function. Signed-off-by: Dan Carpenter Acked-by: Dmitry Torokhov Signed-off-by: Len Brown --- drivers/acpi/video.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 609262dc125..a576575617d 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1687,10 +1687,6 @@ static int acpi_video_bus_add(struct acpi_device *device) set_bit(KEY_BRIGHTNESS_ZERO, input->keybit); set_bit(KEY_DISPLAY_OFF, input->keybit); - error = input_register_device(input); - if (error) - goto err_stop_video; - printk(KERN_INFO PREFIX "%s [%s] (multi-head: %s rom: %s post: %s)\n", ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device), video->flags.multihead ? "yes" : "no", @@ -1701,12 +1697,16 @@ static int acpi_video_bus_add(struct acpi_device *device) video->pm_nb.priority = 0; error = register_pm_notifier(&video->pm_nb); if (error) - goto err_unregister_input_dev; + goto err_stop_video; + + error = input_register_device(input); + if (error) + goto err_unregister_pm_notifier; return 0; - err_unregister_input_dev: - input_unregister_device(input); + err_unregister_pm_notifier: + unregister_pm_notifier(&video->pm_nb); err_stop_video: acpi_video_bus_stop_devices(video); err_free_input_dev: From a4dff3043c231d57f982af635c9d2192ee40e5ae Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 30 May 2012 16:25:41 -0700 Subject: [PATCH 087/475] target/file: Use O_DSYNC by default for FILEIO backends Convert to use O_DSYNC for all cases at FILEIO backend creation time to avoid the extra syncing of pure timestamp updates with legacy O_SYNC during default operation as recommended by hch. Continue to do this independently of Write Cache Enable (WCE) bit, as WCE=0 is currently the default for all backend devices and enabled by user on per device basis via attrib/emulate_write_cache. This patch drops the now unnecessary fd_buffered_io= token usage that was originally signalling when to explictly disable O_SYNC at backend creation time for buffered I/O operation. This can end up being dangerous for a number of reasons during physical node failure, so go ahead and drop this option for now when O_DSYNC is used as the default. Also allow explict FUA WRITEs -> vfs_fsync_range() call to function in fd_execute_cmd() independently of WCE bit setting. Reported-by: Christoph Hellwig Cc: Linus Torvalds Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_file.c | 70 ++++++++----------------------- drivers/target/target_core_file.h | 1 - 2 files changed, 17 insertions(+), 54 deletions(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 686dba189f8..9f99d040490 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -133,16 +133,11 @@ static struct se_device *fd_create_virtdevice( ret = PTR_ERR(dev_p); goto fail; } - - /* O_DIRECT too? */ - flags = O_RDWR | O_CREAT | O_LARGEFILE; - /* - * If fd_buffered_io=1 has not been set explicitly (the default), - * use O_SYNC to force FILEIO writes to disk. + * Use O_DSYNC by default instead of O_SYNC to forgo syncing + * of pure timestamp updates. */ - if (!(fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO)) - flags |= O_SYNC; + flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC; file = filp_open(dev_p, flags, 0600); if (IS_ERR(file)) { @@ -380,23 +375,6 @@ static void fd_emulate_sync_cache(struct se_cmd *cmd) } } -static void fd_emulate_write_fua(struct se_cmd *cmd) -{ - struct se_device *dev = cmd->se_dev; - struct fd_dev *fd_dev = dev->dev_ptr; - loff_t start = cmd->t_task_lba * - dev->se_sub_dev->se_dev_attrib.block_size; - loff_t end = start + cmd->data_length; - int ret; - - pr_debug("FILEIO: FUA WRITE LBA: %llu, bytes: %u\n", - cmd->t_task_lba, cmd->data_length); - - ret = vfs_fsync_range(fd_dev->fd_file, start, end, 1); - if (ret != 0) - pr_err("FILEIO: vfs_fsync_range() failed: %d\n", ret); -} - static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) { @@ -411,19 +389,21 @@ static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl, ret = fd_do_readv(cmd, sgl, sgl_nents); } else { ret = fd_do_writev(cmd, sgl, sgl_nents); - + /* + * Perform implict vfs_fsync_range() for fd_do_writev() ops + * for SCSI WRITEs with Forced Unit Access (FUA) set. + * Allow this to happen independent of WCE=0 setting. + */ if (ret > 0 && - dev->se_sub_dev->se_dev_attrib.emulate_write_cache > 0 && dev->se_sub_dev->se_dev_attrib.emulate_fua_write > 0 && (cmd->se_cmd_flags & SCF_FUA)) { - /* - * We might need to be a bit smarter here - * and return some sense data to let the initiator - * know the FUA WRITE cache sync failed..? - */ - fd_emulate_write_fua(cmd); - } + struct fd_dev *fd_dev = dev->dev_ptr; + loff_t start = cmd->t_task_lba * + dev->se_sub_dev->se_dev_attrib.block_size; + loff_t end = start + cmd->data_length; + vfs_fsync_range(fd_dev->fd_file, start, end, 1); + } } if (ret < 0) { @@ -442,7 +422,6 @@ enum { static match_table_t tokens = { {Opt_fd_dev_name, "fd_dev_name=%s"}, {Opt_fd_dev_size, "fd_dev_size=%s"}, - {Opt_fd_buffered_io, "fd_buffered_io=%d"}, {Opt_err, NULL} }; @@ -454,7 +433,7 @@ static ssize_t fd_set_configfs_dev_params( struct fd_dev *fd_dev = se_dev->se_dev_su_ptr; char *orig, *ptr, *arg_p, *opts; substring_t args[MAX_OPT_ARGS]; - int ret = 0, arg, token; + int ret = 0, token; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -498,19 +477,6 @@ static ssize_t fd_set_configfs_dev_params( " bytes\n", fd_dev->fd_dev_size); fd_dev->fbd_flags |= FBDF_HAS_SIZE; break; - case Opt_fd_buffered_io: - match_int(args, &arg); - if (arg != 1) { - pr_err("bogus fd_buffered_io=%d value\n", arg); - ret = -EINVAL; - goto out; - } - - pr_debug("FILEIO: Using buffered I/O" - " operations for struct fd_dev\n"); - - fd_dev->fbd_flags |= FDBD_USE_BUFFERED_IO; - break; default: break; } @@ -542,10 +508,8 @@ static ssize_t fd_show_configfs_dev_params( ssize_t bl = 0; bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id); - bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n", - fd_dev->fd_dev_name, fd_dev->fd_dev_size, - (fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO) ? - "Buffered" : "Synchronous"); + bl += sprintf(b + bl, " File: %s Size: %llu Mode: O_DSYNC\n", + fd_dev->fd_dev_name, fd_dev->fd_dev_size); return bl; } diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h index fbd59ef7d8b..70ce7fd7111 100644 --- a/drivers/target/target_core_file.h +++ b/drivers/target/target_core_file.h @@ -14,7 +14,6 @@ #define FBDF_HAS_PATH 0x01 #define FBDF_HAS_SIZE 0x02 -#define FDBD_USE_BUFFERED_IO 0x04 struct fd_dev { u32 fbd_flags; From bfaa25f33425db16a942b7c71e396a47581c646d Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Wed, 23 May 2012 16:30:53 -0600 Subject: [PATCH 088/475] regmap: clean up debugfs if regmap_init fails If debugfs isn't cleaned up, stale files will be left in the filesystem which will cause an OOPS when accessed the first time, and hang the accessing application when accessed again, presumably due to some lock being left held. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 0bcda488f11..2aa076e6136 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -368,7 +368,7 @@ struct regmap *regmap_init(struct device *dev, ret = regcache_init(map, config); if (ret < 0) - goto err_free_workbuf; + goto err_debugfs; /* Add a devres resource for dev_get_regmap() */ m = devres_alloc(dev_get_regmap_release, sizeof(*m), GFP_KERNEL); @@ -383,7 +383,8 @@ struct regmap *regmap_init(struct device *dev, err_cache: regcache_exit(map); -err_free_workbuf: +err_debugfs: + regmap_debugfs_exit(map); kfree(map->work_buf); err_map: kfree(map); From 5494a98f451d59f6c05f3f688510e0832445f661 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 31 May 2012 21:10:30 -0300 Subject: [PATCH 089/475] regmap: Fix the size calculation for map->format.buf_size The word to be transmitted/received via regmap is composed by the following parts: config->reg_bits config->val_bits config->pad_bits ,so the total size should be calculated by summing up the number of bits of each element and using a DIV_ROUND_UP to return the number of bytes. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 2aa076e6136..04b48027c21 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -246,11 +246,11 @@ struct regmap *regmap_init(struct device *dev, map->lock = regmap_lock_mutex; map->unlock = regmap_unlock_mutex; } - map->format.buf_size = (config->reg_bits + config->val_bits) / 8; map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8); map->format.pad_bytes = config->pad_bits / 8; map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8); - map->format.buf_size += map->format.pad_bytes; + map->format.buf_size = DIV_ROUND_UP(config->reg_bits + + config->val_bits + config->pad_bits, 8); map->reg_shift = config->pad_bits % 8; if (config->reg_stride) map->reg_stride = config->reg_stride; From ca4620853a5b578725edec1fc8f1345db3a823ab Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 1 Jun 2012 23:28:23 -0700 Subject: [PATCH 090/475] MAINTAINERS: Update my e-mail address Maintainer activities moved to home server. Update e-mail address to reflect reality. Signed-off-by: Guenter Roeck Acked-by: Jean Delvare --- MAINTAINERS | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 55f0fda602e..5b75b9706d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2270,7 +2270,7 @@ F: include/linux/device-mapper.h F: include/linux/dm-*.h DIOLAN U2C-12 I2C DRIVER -M: Guenter Roeck +M: Guenter Roeck L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-diolan-u2c.c @@ -3138,7 +3138,7 @@ F: drivers/tty/hvc/ HARDWARE MONITORING M: Jean Delvare -M: Guenter Roeck +M: Guenter Roeck L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/ @@ -4411,6 +4411,13 @@ S: Orphan F: drivers/video/matrox/matroxfb_* F: include/linux/matroxfb.h +MAX16065 HARDWARE MONITOR DRIVER +M: Guenter Roeck +L: lm-sensors@lm-sensors.org +S: Maintained +F: Documentation/hwmon/max16065 +F: drivers/hwmon/max16065.c + MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: "Hans J. Koch" L: lm-sensors@lm-sensors.org @@ -5149,7 +5156,7 @@ F: drivers/leds/leds-pca9532.c F: include/linux/leds-pca9532.h PCA9541 I2C BUS MASTER SELECTOR DRIVER -M: Guenter Roeck +M: Guenter Roeck L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/muxes/i2c-mux-pca9541.c @@ -5299,7 +5306,7 @@ F: drivers/video/fb-puv3.c F: drivers/rtc/rtc-puv3.c PMBUS HARDWARE MONITORING DRIVERS -M: Guenter Roeck +M: Guenter Roeck L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ W: http://www.roeck-us.net/linux/drivers/ From 617caccebe451716df21c069b079d5936ed7b0f3 Mon Sep 17 00:00:00 2001 From: AnilKumar Ch Date: Wed, 23 May 2012 17:45:09 +0530 Subject: [PATCH 091/475] can: c_can: fix "BUG! echo_skb is occupied!" during transmit This patch fixes an issue with transmit routine, which causes "can_put_echo_skb: BUG! echo_skb is occupied!" message when using "cansequence -p" on D_CAN controller. In c_can driver, while transmitting packets tx_echo flag holds the no of can frames put for transmission into the hardware. As the comment above c_can_do_tx() indicates, if we find any packet which is not transmitted then we should stop looking for more. In the current implementation this is not taken care of causing the said message. Also, fix the condition used to find if the packet is transmitted or not. Current code skips the first tx message object and ends up checking one extra invalid object. While at it, fix the comment on top of c_can_do_tx() to use the terminology "packet" instead of "package" since it is more standard. Cc: stable@kernel.org # 2.6.39+ Signed-off-by: AnilKumar Ch Acked-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 536bda072a1..9ac28df3c89 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -686,7 +686,7 @@ static int c_can_get_berr_counter(const struct net_device *dev, * * We iterate from priv->tx_echo to priv->tx_next and check if the * packet has been transmitted, echo it back to the CAN framework. - * If we discover a not yet transmitted package, stop looking for more. + * If we discover a not yet transmitted packet, stop looking for more. */ static void c_can_do_tx(struct net_device *dev) { @@ -698,7 +698,7 @@ static void c_can_do_tx(struct net_device *dev) for (/* nix */; (priv->tx_next - priv->tx_echo) > 0; priv->tx_echo++) { msg_obj_no = get_tx_echo_msg_obj(priv); val = c_can_read_reg32(priv, &priv->regs->txrqst1); - if (!(val & (1 << msg_obj_no))) { + if (!(val & (1 << (msg_obj_no - 1)))) { can_get_echo_skb(dev, msg_obj_no - C_CAN_MSG_OBJ_TX_FIRST); stats->tx_bytes += priv->read_reg(priv, @@ -706,6 +706,8 @@ static void c_can_do_tx(struct net_device *dev) & IF_MCONT_DLC_MASK; stats->tx_packets++; c_can_inval_msg_object(dev, 0, msg_obj_no); + } else { + break; } } From 148c87c89e1a8863d3d965179f3ab1a06490569e Mon Sep 17 00:00:00 2001 From: AnilKumar Ch Date: Wed, 23 May 2012 17:45:10 +0530 Subject: [PATCH 092/475] can: c_can: fix an interrupt thrash issue with c_can driver This patch fixes an interrupt thrash issue with c_can driver. In c_can_isr() function interrupts are disabled and enabled only in c_can_poll() function. c_can_isr() & c_can_poll() both read the irqstatus flag. However, irqstatus is always read as 0 in c_can_poll() because all C_CAN interrupts are disabled in c_can_isr(). This causes all interrupts to be re-enabled in c_can_poll() which in turn causes another interrupt since the event is not really handled. This keeps happening causing a flood of interrupts. To fix this, read the irqstatus register in isr and use the same cached value in the poll function. Cc: stable@kernel.org # 2.6.39+ Signed-off-by: AnilKumar Ch Acked-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 7 +++---- drivers/net/can/c_can/c_can.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 9ac28df3c89..fa016214c52 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -952,7 +952,7 @@ static int c_can_poll(struct napi_struct *napi, int quota) struct net_device *dev = napi->dev; struct c_can_priv *priv = netdev_priv(dev); - irqstatus = priv->read_reg(priv, &priv->regs->interrupt); + irqstatus = priv->irqstatus; if (!irqstatus) goto end; @@ -1030,12 +1030,11 @@ end: static irqreturn_t c_can_isr(int irq, void *dev_id) { - u16 irqstatus; struct net_device *dev = (struct net_device *)dev_id; struct c_can_priv *priv = netdev_priv(dev); - irqstatus = priv->read_reg(priv, &priv->regs->interrupt); - if (!irqstatus) + priv->irqstatus = priv->read_reg(priv, &priv->regs->interrupt); + if (!priv->irqstatus) return IRQ_NONE; /* disable all interrupts and schedule the NAPI */ diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index 9b7fbef3d09..5f32d34af50 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -76,6 +76,7 @@ struct c_can_priv { unsigned int tx_next; unsigned int tx_echo; void *priv; /* for board-specific data */ + u16 irqstatus; }; struct net_device *alloc_c_can_dev(void); From f461f27a4436dbe691908fe08b867ef888848cc3 Mon Sep 17 00:00:00 2001 From: AnilKumar Ch Date: Wed, 23 May 2012 17:45:11 +0530 Subject: [PATCH 093/475] can: c_can: fix race condition in c_can_open() Fix the issue of C_CAN interrupts getting disabled forever when canconfig utility is used multiple times. According to NAPI usage we disable all the hardware interrupts in ISR and re-enable them in poll(). Current implementation calls napi_enable() after hardware interrupts are enabled. If we get any interrupts between these two steps then we do not process those interrupts because napi is not enabled. Mostly these interrupts come because of STATUS is not 0x7 or ERROR interrupts. If napi_enable() happens before HW interrupts enabled then c_can_poll() function will be called eventual re-enabling. This patch moves the napi_enable() call before interrupts enabled. Cc: stable@kernel.org # 2.6.39+ Signed-off-by: AnilKumar Ch Acked-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index fa016214c52..8dc84d66eea 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -1064,10 +1064,11 @@ static int c_can_open(struct net_device *dev) goto exit_irq_fail; } + napi_enable(&priv->napi); + /* start the c_can controller */ c_can_start(dev); - napi_enable(&priv->napi); netif_start_queue(dev); return 0; From dc605dbdb8dd152448ccb8d4c4d5bd9439965a20 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 30 May 2012 13:25:55 -0700 Subject: [PATCH 094/475] can: cc770: Fix likely misuse of | for & Using | with a constant is always true. Likely this should have be &. Signed-off-by: Joe Perches Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/cc770/cc770_platform.c b/drivers/net/can/cc770/cc770_platform.c index 53115eee807..688371cda37 100644 --- a/drivers/net/can/cc770/cc770_platform.c +++ b/drivers/net/can/cc770/cc770_platform.c @@ -154,7 +154,7 @@ static int __devinit cc770_get_platform_data(struct platform_device *pdev, struct cc770_platform_data *pdata = pdev->dev.platform_data; priv->can.clock.freq = pdata->osc_freq; - if (priv->cpu_interface | CPUIF_DSC) + if (priv->cpu_interface & CPUIF_DSC) priv->can.clock.freq /= 2; priv->clkout = pdata->cor; priv->bus_config = pdata->bcr; From 2f9d3df8aa1cc3c6db5cfa0bad3f0745e04cc27d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jun 2012 14:50:19 -0700 Subject: [PATCH 095/475] vfs: move inode stat information closer together The comment above it says "Stat data, not accessed from path walking", but in fact some of inode fields we use for the common stat data was way down at the end of the inode, causing unnecessary cache misses for the common stat operations. The inode structure is pretty big, and this can change padding depending on field width, but at least on the common 64-bit configurations this doesn't change the size. Some of our inode layout has historically been to tro to avoid unnecessary padding fields, but cache locality is at least as important for layout, if not more. Noticed by looking at kernel profiles, and noticing that the "i_blkbits" access stood out like a sore thumb. Signed-off-by: Linus Torvalds --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 51978ed43e9..17fd887c798 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -802,13 +802,14 @@ struct inode { unsigned int __i_nlink; }; dev_t i_rdev; + loff_t i_size; struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; + unsigned int i_blkbits; blkcnt_t i_blocks; - loff_t i_size; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; @@ -828,9 +829,8 @@ struct inode { struct list_head i_dentry; struct rcu_head i_rcu; }; - atomic_t i_count; - unsigned int i_blkbits; u64 i_version; + atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ From 752dc185dacba1edcba425e67fc6df3c7793a5c3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 2 Jun 2012 00:27:47 -0700 Subject: [PATCH 096/475] mm: fix warning in __set_page_dirty_nobuffers New tmpfs use of !PageUptodate pages for fallocate() is triggering the WARNING: at mm/page-writeback.c:1990 when __set_page_dirty_nobuffers() is called from migrate_page_copy() for compaction. It is anomalous that migration should use __set_page_dirty_nobuffers() on an address_space that does not participate in dirty and writeback accounting; and this has also been observed to insert surprising dirty tags into a tmpfs radix_tree, despite tmpfs not using tags at all. We should probably give migrate_page_copy() a better way to preserve the tag and migrate accounting info, when mapping_cap_account_dirty(). But that needs some more work: so in the interim, avoid the warning by using a simple SetPageDirty on PageSwapBacked pages. Reported-and-tested-by: Dave Jones Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/migrate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index ab81d482ae6..be26d5cbe56 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -436,7 +436,10 @@ void migrate_page_copy(struct page *newpage, struct page *page) * is actually a signal that all of the page has become dirty. * Whereas only part of our page may be dirty. */ - __set_page_dirty_nobuffers(newpage); + if (PageSwapBacked(page)) + SetPageDirty(newpage); + else + __set_page_dirty_nobuffers(newpage); } mlock_migrate_page(newpage, page); From 68e3e92620c323703bc7db75c2ba15239ee85c39 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jun 2012 20:05:57 -0700 Subject: [PATCH 097/475] Revert "mm: compaction: handle incorrect MIGRATE_UNMOVABLE type pageblocks" This reverts commit 5ceb9ce6fe9462a298bb2cd5c9f1ca6cb80a0199. That commit seems to be the cause of the mm compation list corruption issues that Dave Jones reported. The locking (or rather, absense there-of) is dubious, as is the use of the 'page' variable once it has been found to be outside the pageblock range. So revert it for now, we can re-visit this for 3.6. If we even need to: as Minchan Kim says, "The patch wasn't a bug fix and even test workload was very theoretical". Reported-and-tested-by: Dave Jones Acked-by: Hugh Dickins Acked-by: KOSAKI Motohiro Acked-by: Minchan Kim Cc: Bartlomiej Zolnierkiewicz Cc: Kyungmin Park Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 19 ----- mm/compaction.c | 142 ++++++------------------------------- mm/internal.h | 9 +-- mm/page_alloc.c | 8 +-- 4 files changed, 28 insertions(+), 150 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e988037abd2..51a90b7f2d6 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -1,8 +1,6 @@ #ifndef _LINUX_COMPACTION_H #define _LINUX_COMPACTION_H -#include - /* Return values for compact_zone() and try_to_compact_pages() */ /* compaction didn't start as it was not possible or direct reclaim was more suitable */ #define COMPACT_SKIPPED 0 @@ -13,23 +11,6 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 -/* - * compaction supports three modes - * - * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans - * MIGRATE_MOVABLE pageblocks as migration sources and targets. - * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans - * MIGRATE_MOVABLE pageblocks as migration sources. - * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration - * targets and convers them to MIGRATE_MOVABLE if possible - * COMPACT_SYNC uses synchronous migration and scans all pageblocks - */ -enum compact_mode { - COMPACT_ASYNC_MOVABLE, - COMPACT_ASYNC_UNMOVABLE, - COMPACT_SYNC, -}; - #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, diff --git a/mm/compaction.c b/mm/compaction.c index 4ac338af512..7ea259d82a9 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -236,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ - if (cc->mode != COMPACT_SYNC) + if (!cc->sync) return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -304,8 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, * satisfies the allocation */ pageblock_nr = low_pfn >> pageblock_order; - if (cc->mode != COMPACT_SYNC && - last_pageblock_nr != pageblock_nr && + if (!cc->sync && last_pageblock_nr != pageblock_nr && !migrate_async_suitable(get_pageblock_migratetype(page))) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; @@ -326,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (cc->mode != COMPACT_SYNC) + if (!cc->sync) mode |= ISOLATE_ASYNC_MIGRATE; lruvec = mem_cgroup_page_lruvec(page, zone); @@ -361,90 +360,27 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, #endif /* CONFIG_COMPACTION || CONFIG_CMA */ #ifdef CONFIG_COMPACTION -/* - * Returns true if MIGRATE_UNMOVABLE pageblock was successfully - * converted to MIGRATE_MOVABLE type, false otherwise. - */ -static bool rescue_unmovable_pageblock(struct page *page) -{ - unsigned long pfn, start_pfn, end_pfn; - struct page *start_page, *end_page; - pfn = page_to_pfn(page); - start_pfn = pfn & ~(pageblock_nr_pages - 1); - end_pfn = start_pfn + pageblock_nr_pages; - - start_page = pfn_to_page(start_pfn); - end_page = pfn_to_page(end_pfn); - - /* Do not deal with pageblocks that overlap zones */ - if (page_zone(start_page) != page_zone(end_page)) - return false; - - for (page = start_page, pfn = start_pfn; page < end_page; pfn++, - page++) { - if (!pfn_valid_within(pfn)) - continue; - - if (PageBuddy(page)) { - int order = page_order(page); - - pfn += (1 << order) - 1; - page += (1 << order) - 1; - - continue; - } else if (page_count(page) == 0 || PageLRU(page)) - continue; - - return false; - } - - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE); - return true; -} - -enum smt_result { - GOOD_AS_MIGRATION_TARGET, - FAIL_UNMOVABLE_TARGET, - FAIL_BAD_TARGET, -}; - -/* - * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block - * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page - * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise. - */ -static enum smt_result suitable_migration_target(struct page *page, - struct compact_control *cc) +/* Returns true if the page is within a block suitable for migration to */ +static bool suitable_migration_target(struct page *page) { int migratetype = get_pageblock_migratetype(page); /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return FAIL_BAD_TARGET; + return false; /* If the page is a large free page, then allow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) - return GOOD_AS_MIGRATION_TARGET; + return true; /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (cc->mode != COMPACT_ASYNC_UNMOVABLE && - migrate_async_suitable(migratetype)) - return GOOD_AS_MIGRATION_TARGET; - - if (cc->mode == COMPACT_ASYNC_MOVABLE && - migratetype == MIGRATE_UNMOVABLE) - return FAIL_UNMOVABLE_TARGET; - - if (cc->mode != COMPACT_ASYNC_MOVABLE && - migratetype == MIGRATE_UNMOVABLE && - rescue_unmovable_pageblock(page)) - return GOOD_AS_MIGRATION_TARGET; + if (migrate_async_suitable(migratetype)) + return true; /* Otherwise skip the block */ - return FAIL_BAD_TARGET; + return false; } /* @@ -477,13 +413,6 @@ static void isolate_freepages(struct zone *zone, zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - /* - * isolate_freepages() may be called more than once during - * compact_zone_order() run and we want only the most recent - * count. - */ - cc->nr_pageblocks_skipped = 0; - /* * Isolate free pages until enough are available to migrate the * pages on cc->migratepages. We stop searching if the migrate @@ -492,7 +421,6 @@ static void isolate_freepages(struct zone *zone, for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; pfn -= pageblock_nr_pages) { unsigned long isolated; - enum smt_result ret; if (!pfn_valid(pfn)) continue; @@ -509,12 +437,9 @@ static void isolate_freepages(struct zone *zone, continue; /* Check the block is suitable for migration */ - ret = suitable_migration_target(page, cc); - if (ret != GOOD_AS_MIGRATION_TARGET) { - if (ret == FAIL_UNMOVABLE_TARGET) - cc->nr_pageblocks_skipped++; + if (!suitable_migration_target(page)) continue; - } + /* * Found a block suitable for isolating free pages from. Now * we disabled interrupts, double check things are ok and @@ -523,14 +448,12 @@ static void isolate_freepages(struct zone *zone, */ isolated = 0; spin_lock_irqsave(&zone->lock, flags); - ret = suitable_migration_target(page, cc); - if (ret == GOOD_AS_MIGRATION_TARGET) { + if (suitable_migration_target(page)) { end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); isolated = isolate_freepages_block(pfn, end_pfn, freelist, false); nr_freepages += isolated; - } else if (ret == FAIL_UNMOVABLE_TARGET) - cc->nr_pageblocks_skipped++; + } spin_unlock_irqrestore(&zone->lock, flags); /* @@ -762,9 +685,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, - (unsigned long)&cc->freepages, false, - (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT - : MIGRATE_ASYNC); + (unsigned long)cc, false, + cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; @@ -793,8 +715,7 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - enum compact_mode mode, - unsigned long *nr_pageblocks_skipped) + bool sync) { struct compact_control cc = { .nr_freepages = 0, @@ -802,17 +723,12 @@ static unsigned long compact_zone_order(struct zone *zone, .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, - .mode = mode, + .sync = sync, }; - unsigned long rc; - INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - rc = compact_zone(zone, &cc); - *nr_pageblocks_skipped = cc.nr_pageblocks_skipped; - - return rc; + return compact_zone(zone, &cc); } int sysctl_extfrag_threshold = 500; @@ -837,8 +753,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, struct zoneref *z; struct zone *zone; int rc = COMPACT_SKIPPED; - unsigned long nr_pageblocks_skipped; - enum compact_mode mode; /* * Check whether it is worth even starting compaction. The order check is @@ -855,22 +769,12 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, nodemask) { int status; - mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE; -retry: - status = compact_zone_order(zone, order, gfp_mask, mode, - &nr_pageblocks_skipped); + status = compact_zone_order(zone, order, gfp_mask, sync); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) break; - - if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) { - if (nr_pageblocks_skipped) { - mode = COMPACT_ASYNC_UNMOVABLE; - goto retry; - } - } } return rc; @@ -904,7 +808,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) if (ok && cc->order > zone->compact_order_failed) zone->compact_order_failed = cc->order + 1; /* Currently async compaction is never deferred. */ - else if (!ok && cc->mode == COMPACT_SYNC) + else if (!ok && cc->sync) defer_compaction(zone, cc->order); } @@ -919,7 +823,7 @@ int compact_pgdat(pg_data_t *pgdat, int order) { struct compact_control cc = { .order = order, - .mode = COMPACT_ASYNC_MOVABLE, + .sync = false, }; return __compact_pgdat(pgdat, &cc); @@ -929,7 +833,7 @@ static int compact_node(int nid) { struct compact_control cc = { .order = -1, - .mode = COMPACT_SYNC, + .sync = true, }; return __compact_pgdat(NODE_DATA(nid), &cc); diff --git a/mm/internal.h b/mm/internal.h index 5cbb7819004..2ba87fbfb75 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -94,9 +94,6 @@ extern void putback_lru_page(struct page *page); /* * in mm/page_alloc.c */ -extern void set_pageblock_migratetype(struct page *page, int migratetype); -extern int move_freepages_block(struct zone *zone, struct page *page, - int migratetype); extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned long order); #ifdef CONFIG_MEMORY_FAILURE @@ -104,7 +101,6 @@ extern bool is_free_buddy_page(struct page *page); #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA -#include /* * in mm/compaction.c @@ -123,14 +119,11 @@ struct compact_control { unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ - enum compact_mode mode; /* Compaction mode */ + bool sync; /* Synchronous migration */ int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; - - /* Number of UNMOVABLE destination pageblocks skipped during scan */ - unsigned long nr_pageblocks_skipped; }; unsigned long diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6092f331b32..44030096da6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes); int page_group_by_mobility_disabled __read_mostly; -void set_pageblock_migratetype(struct page *page, int migratetype) +static void set_pageblock_migratetype(struct page *page, int migratetype) { if (unlikely(page_group_by_mobility_disabled)) @@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone, return pages_moved; } -int move_freepages_block(struct zone *zone, struct page *page, - int migratetype) +static int move_freepages_block(struct zone *zone, struct page *page, + int migratetype) { unsigned long start_pfn, end_pfn; struct page *start_page, *end_page; @@ -5651,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) .nr_migratepages = 0, .order = -1, .zone = page_zone(pfn_to_page(start)), - .mode = COMPACT_SYNC, + .sync = true, }; INIT_LIST_HEAD(&cc.migratepages); From d15cf7c129fa4ec4b44c52521e49ffafb9749029 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 3 Jun 2012 23:24:00 -0400 Subject: [PATCH 098/475] tools/power turbostat: fix un-intended affinity of forked program Linux 3.4 included a modification to turbostat to lower cross-call overhead by using scheduler affinity: 15aaa34654831e98dd76f7738b6c7f5d05a66430 (tools turbostat: reduce measurement overhead due to IPIs) In the use-case where turbostat forks a child program, that change had the un-intended side-effect of binding the child to the last cpu in the system. This change removed the binding before forking the child. This is a back-port of a fix already included in turbostat v2. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 +++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab2f682fd44..d5d6a3dc9ba 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -73,8 +73,8 @@ int backwards_count; char *progname; int num_cpus; -cpu_set_t *cpu_mask; -size_t cpu_mask_size; +cpu_set_t *cpu_present_set, *cpu_mask; +size_t cpu_present_setsize, cpu_mask_size; struct counters { unsigned long long tsc; /* per thread */ @@ -103,6 +103,12 @@ struct timeval tv_even; struct timeval tv_odd; struct timeval tv_delta; +int mark_cpu_present(int pkg, int core, int cpu) +{ + CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); + return 0; +} + /* * cpu_mask_init(ncpus) * @@ -118,6 +124,18 @@ void cpu_mask_init(int ncpus) } cpu_mask_size = CPU_ALLOC_SIZE(ncpus); CPU_ZERO_S(cpu_mask_size, cpu_mask); + + /* + * Allocate and initialize cpu_present_set + */ + cpu_present_set = CPU_ALLOC(ncpus); + if (cpu_present_set == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); + CPU_ZERO_S(cpu_present_setsize, cpu_present_set); + for_all_cpus(mark_cpu_present); } void cpu_mask_uninit() @@ -125,6 +143,9 @@ void cpu_mask_uninit() CPU_FREE(cpu_mask); cpu_mask = NULL; cpu_mask_size = 0; + CPU_FREE(cpu_present_set); + cpu_present_set = NULL; + cpu_present_setsize = 0; } int cpu_migrate(int cpu) @@ -1047,6 +1068,9 @@ int fork_it(char **argv) int retval; pid_t child_pid; get_counters(cnt_even); + + /* clear affinity side-effect of get_counters() */ + sched_setaffinity(0, cpu_present_setsize, cpu_present_set); gettimeofday(&tv_even, (struct timezone *)NULL); child_pid = fork(); From 650a37f32d2bc16fa802075be579802bc4ec4132 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 3 Jun 2012 23:34:44 -0400 Subject: [PATCH 099/475] tools/power turbostat: fix IVB support Initial IVB support went into turbostat in Linux-3.1: 553575f1ae048aa44682b46b3c51929a0b3ad337 (tools turbostat: recognize and run properly on IVB) However, when running on IVB, turbostat would fail to report the new couters added with SNB, c7, pc2 and pc7. So in scenarios where these counters are non-zero on IVB, turbostat would report erroneous residencey results. In particular c7 time would be added to c1 time, since c1 time is calculated as "that which is left over". Also, turbostat reports MHz capabilities when passed the "-v" option, and it would incorrectly report 133MHz bclk instead of 100MHz bclk for IVB, which would inflate GHz reported with that option. This patch is a backport of a fix already included in turbostat v2. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d5d6a3dc9ba..16de7ad4850 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -933,6 +933,8 @@ int is_snb(unsigned int family, unsigned int model) switch (model) { case 0x2A: case 0x2D: + case 0x3A: /* IVB */ + case 0x3D: /* IVB Xeon */ return 1; } return 0; From 2f07a6134f670755e6ce657d019c26305bfcef89 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 5 Jan 2012 10:47:58 -0200 Subject: [PATCH 100/475] drivers: acpi: Fix dependency for ACPI_HOTPLUG_CPU Fix the following build warning: warning: (ACPI_HOTPLUG_CPU) selects ACPI_CONTAINER which has unmet direct dependencies (ACPI && EXPERIMENTAL) Signed-off-by: Fabio Estevam Signed-off-by: Len Brown --- drivers/acpi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 47768ff8734..80998958cf4 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -208,7 +208,7 @@ config ACPI_IPMI config ACPI_HOTPLUG_CPU bool - depends on ACPI_PROCESSOR && HOTPLUG_CPU + depends on EXPERIMENTAL && ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_CONTAINER default y From 5041caa4d5e6dae418963de0c8f8a83f35e35dcf Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Mon, 4 Jun 2012 13:07:07 +0900 Subject: [PATCH 101/475] gpio/samsung: fix the typo 'exynos5_xxx' instead of 'exonys5_xxx' Should be 'exynos5_xxx' instead of 'exonys5_xxx'. It happened at the commit 30b842889eea ("Merge tag 'soc2' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc") during v3.5 merge window. Signed-off-by: Kukjin Kim [ My bad - Linus ] Signed-off-by: Linus Torvalds --- drivers/gpio/gpio-samsung.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c index 7bb00448e13..b6453d0e44a 100644 --- a/drivers/gpio/gpio-samsung.c +++ b/drivers/gpio/gpio-samsung.c @@ -2833,7 +2833,7 @@ static __init void exynos5_gpiolib_init(void) } /* need to set base address for gpc4 */ - exonys5_gpios_1[11].base = gpio_base1 + 0x2E0; + exynos5_gpios_1[11].base = gpio_base1 + 0x2E0; /* need to set base address for gpx */ chip = &exynos5_gpios_1[21]; From 7ae30986dc63d214cb075a40f2cf205f0a7806cd Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 4 Jun 2012 00:29:11 -0400 Subject: [PATCH 102/475] ACPI: fix acpi_bus.h build warnings when ACPI is not enabled introduced in Linux-3.5-rc1 by 66886d6f8c9bcdee3d7fce5796dcffd6b4bc0b48 (ACPI: Add stubs for (un)register_acpi_bus_type) Fix header file warnings when CONFIG_ACPI is not enabled: include/acpi/acpi_bus.h:443:42: warning: 'struct acpi_bus_type' declared inside parameter list include/acpi/acpi_bus.h:443:42: warning: its scope is only this definition or declaration, which is probably not include/acpi/acpi_bus.h:444:44: warning: 'struct acpi_bus_type' declared inside parameter list Signed-off-by: Len Brown --- include/acpi/acpi_bus.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b0d62820ada..9e6e1c6eb60 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -440,8 +440,8 @@ static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable) #else /* CONFIG_ACPI */ -static int register_acpi_bus_type(struct acpi_bus_type *bus) { return 0; } -static int unregister_acpi_bus_type(struct acpi_bus_type *bus) { return 0; } +static inline int register_acpi_bus_type(void *bus) { return 0; } +static inline int unregister_acpi_bus_type(void *bus) { return 0; } #endif /* CONFIG_ACPI */ From 71b43fd573a60972b2175df4927c4ee10d757004 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 17 May 2012 17:51:53 -0300 Subject: [PATCH 103/475] RDMA/cxgb4: Fix crash when peer address is 0.0.0.0 When using rping -c -a 0.0.0.0 with iw_cxgb4, the system crashes when rdma_connect() is called. ip_dev_find() will return NULL, but pdev is accessed anyway. Checking that pdev is NULL and returning -ENODEV prevents the system from crashing. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 55ab284e22f..b18870c455a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1593,6 +1593,10 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, struct net_device *pdev; pdev = ip_dev_find(&init_net, peer_ip); + if (!pdev) { + err = -ENODEV; + goto out; + } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, 0); if (!ep->l2t) From f1ae98da8525c6b8b1c301c3a2b0bd2b6515cca2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 30 May 2012 10:48:29 +0200 Subject: [PATCH 104/475] ARM: dma-mapping: remove unconditional dependency on CMA CMA has been enabled unconditionally on all ARMv6+ systems to solve the long standing issue of double kernel mappings for all dma coherent buffers. This however created a dependency on CONFIG_EXPERIMENTAL for the whole ARM architecture what should be really avoided. This patch removes this dependency and lets one use old, well-tested dma-mapping implementation also on ARMv6+ systems without the need to use EXPERIMENTAL stuff. Reported-by: Russell King Signed-off-by: Marek Szyprowski --- arch/arm/Kconfig | 1 - arch/arm/mm/dma-mapping.c | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b649c5904a4..84449dd8f03 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -7,7 +7,6 @@ config ARM select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) - select CMA if (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_MEMBLOCK select RTC_LIB select SYS_SUPPORTS_APM_EMULATION diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ea6b4315409..106c4c0ebcc 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -268,10 +268,8 @@ static int __init consistent_init(void) unsigned long base = consistent_base; unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; -#ifndef CONFIG_ARM_DMA_USE_IOMMU - if (cpu_architecture() >= CPU_ARCH_ARMv6) + if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) return 0; -#endif consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); if (!consistent_pte) { @@ -342,7 +340,7 @@ static int __init coherent_init(void) struct page *page; void *ptr; - if (cpu_architecture() < CPU_ARCH_ARMv6) + if (!IS_ENABLED(CONFIG_CMA)) return 0; ptr = __alloc_from_contiguous(NULL, size, prot, &page); @@ -704,7 +702,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (arch_is_coherent() || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (cpu_architecture() < CPU_ARCH_ARMv6) + else if (!IS_ENABLED(CONFIG_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else if (gfp & GFP_ATOMIC) addr = __alloc_from_pool(dev, size, &page, caller); @@ -773,7 +771,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, if (arch_is_coherent() || nommu()) { __dma_free_buffer(page, size); - } else if (cpu_architecture() < CPU_ARCH_ARMv6) { + } else if (!IS_ENABLED(CONFIG_CMA)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { From 3aac6ff16a2097be668975fd51084df2e27e4999 Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Thu, 24 May 2012 16:08:07 +0300 Subject: [PATCH 105/475] IB/mlx4: Fix EQ deallocation in legacy mode Commit e605b743f33d ("IB/mlx4: Increase the number of vectors (EQs) available for ULPs") didn't handle correctly the case where there aren't enough MSI-X vectors to increase the number of EQs, so only the legacy EQs are allocated. This results in an attempt to memset() to zero the EQ table which was never allocated and a kernel crash. Fix this by checking in the teardown flow if the table of EQs was ever allocated. Also remove some unneeded setting to zero of the EQ related fields in struct mlx4_ib_dev. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ee1c577238f..8afea12c8d4 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1084,12 +1084,9 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) int total_eqs = 0; int i, j, eq; - /* Init eq table */ - ibdev->eq_table = NULL; - ibdev->eq_added = 0; - - /* Legacy mode? */ - if (dev->caps.comp_pool == 0) + /* Legacy mode or comp_pool is not large enough */ + if (dev->caps.comp_pool == 0 || + dev->caps.num_ports > dev->caps.comp_pool) return; eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ @@ -1135,7 +1132,10 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; - int total_eqs; + + /* no additional eqs were added */ + if (!ibdev->eq_table) + return; /* Reset the advertised EQ number */ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; @@ -1148,12 +1148,7 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) mlx4_release_eq(dev, ibdev->eq_table[i]); } - total_eqs = dev->caps.num_comp_vectors + ibdev->eq_added; - memset(ibdev->eq_table, 0, total_eqs * sizeof(int)); kfree(ibdev->eq_table); - - ibdev->eq_table = NULL; - ibdev->eq_added = 0; } static void *mlx4_ib_add(struct mlx4_dev *dev) From 1253585d7cb12a365e9effaf2f0c7991d2404996 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 21 May 2012 10:59:02 +0800 Subject: [PATCH 106/475] pinctrl: sirf: mark of_device_id match table as __devinitconst Signed-off-by: Barry Song Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-sirf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-sirf.c b/drivers/pinctrl/pinctrl-sirf.c index ba15b1a29e5..e9f8e7d1100 100644 --- a/drivers/pinctrl/pinctrl-sirf.c +++ b/drivers/pinctrl/pinctrl-sirf.c @@ -1184,7 +1184,7 @@ out_no_gpio_remap: return ret; } -static const struct of_device_id pinmux_ids[] = { +static const struct of_device_id pinmux_ids[] __devinitconst = { { .compatible = "sirf,prima2-gpio-pinmux" }, {} }; From bc66468cee4c23856b2e51d711e62e4ef773001a Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 23 May 2012 00:20:17 +0200 Subject: [PATCH 107/475] pinctrl: fix a minor harmless typo The way the for_each_maps() macro is currently used, using "i" instead of "_i_" works and is harmless. Still, this is a bug, that can trigger any time, if the code around that macro changes. Better fix it now. Acked-by: Stephen Warren Signed-off-by: Guennadi Liakhovetski Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c3b331b74fa..0cc053af70b 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -61,7 +61,7 @@ static LIST_HEAD(pinctrl_maps); list_for_each_entry(_maps_node_, &pinctrl_maps, node) \ for (_i_ = 0, _map_ = &_maps_node_->maps[_i_]; \ _i_ < _maps_node_->num_maps; \ - i++, _map_ = &_maps_node_->maps[_i_]) + _i_++, _map_ = &_maps_node_->maps[_i_]) /** * pinctrl_provide_dummies() - indicate if pinctrl provides dummy state support From 0e9a126e95748e8283488818d98ae5bcb7b84515 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 23 May 2012 16:45:09 +0300 Subject: [PATCH 108/475] OMAPDSS: fix build when DEBUG_FS or DSS_DEBUG_SUPPORT disabled If CONFIG_DEBUG_FS or CONFIG_OMAP2_DSS_DEBUG_SUPPORT is disabled, the build fails: drivers/video/omap2/dss/core.c:197:50: error: static declaration of 'dss_debugfs_create_file' follows non-static declaration drivers/video/omap2/dss/dss.h:166:5: note: previous declaration of 'dss_debugfs_create_file' was here This patch fixes the dummy dss_debugfs_create_file() so that the driver builds. Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index 72ded9cd2cb..5066eee10cc 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -194,8 +194,7 @@ static inline int dss_initialize_debugfs(void) static inline void dss_uninitialize_debugfs(void) { } -static inline int dss_debugfs_create_file(const char *name, - void (*write)(struct seq_file *)) +int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *)) { return 0; } From d1700f9258552294300083eb62f83262f21ae1db Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 23 May 2012 16:56:09 +0300 Subject: [PATCH 109/475] OMAPDSS: Taal: fix compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a warning: drivers/video/omap2/displays/panel-taal.c: In function ‘taal_num_errors_show’: drivers/video/omap2/displays/panel-taal.c:529: warning: ‘errors’ may be used uninitialized in this function Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/displays/panel-taal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c index 2ce9992f403..901576eb5a8 100644 --- a/drivers/video/omap2/displays/panel-taal.c +++ b/drivers/video/omap2/displays/panel-taal.c @@ -526,7 +526,7 @@ static ssize_t taal_num_errors_show(struct device *dev, { struct omap_dss_device *dssdev = to_dss_device(dev); struct taal_data *td = dev_get_drvdata(&dssdev->dev); - u8 errors; + u8 errors = 0; int r; mutex_lock(&td->lock); From 5025ce070e2dff80bcf015a34a82edcf78229287 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 23 May 2012 17:02:26 +0300 Subject: [PATCH 110/475] OMAPDSS: fix bogus WARN_ON in dss_runtime_put() pm_runtime_put_sync() in dss_runtime_put() returns -EBUSY when any child of dss is still enabled. This happens, for example, when a display output is enabled and one dumps the clocks via debugfs. This causes dss_runtime_get & put to be called. While I couldn't find anything about this in the documentation and it wasn't immediately clear from runtime_pm code, it looks to me that pm_runtime_put_sync() returns -EBUSY to inform that things went fine, but the device could not be turned off as there are still child devices that are enabled. This is not a problem. This patch skips the WARN_ON if pm_runtime_put_sync() returns -EBUSY. Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/omap2/dss/dss.c b/drivers/video/omap2/dss/dss.c index 6ea1ff149f6..770632359a1 100644 --- a/drivers/video/omap2/dss/dss.c +++ b/drivers/video/omap2/dss/dss.c @@ -731,7 +731,7 @@ static void dss_runtime_put(void) DSSDBG("dss_runtime_put\n"); r = pm_runtime_put_sync(&dss.pdev->dev); - WARN_ON(r < 0); + WARN_ON(r < 0 && r != -EBUSY); } /* DEBUGFS */ From c71c8fd4daa342ba714090586a55fc5db7eaa275 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 21 May 2012 20:55:40 +0800 Subject: [PATCH 111/475] regulator: palmas: Fix wrong kfree calls The devm_kzalloc function eliminates the need for manual resource releasing and simplify error handling. Resources allocated by devm_* are freed automatically on driver detach. Thus adding kfree calls here will introduce double free bug. The memory of desc array and the pointers to the rdev[] are allocated by devm_kzalloc call for struct palmas_pmic. struct palmas_pmic { struct palmas *palmas; struct device *dev; struct regulator_desc desc[PALMAS_NUM_REGS]; struct regulator_dev *rdev[PALMAS_NUM_REGS]; struct mutex mutex; int smps123; int smps457; int range[PALMAS_REG_SMPS10]; }; Which means we should not call kfree for pmic->rdev and pmic->desc. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/palmas-regulator.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index c4435f608df..9b7ca90057d 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -775,9 +775,6 @@ static __devinit int palmas_probe(struct platform_device *pdev) err_unregister_regulator: while (--id >= 0) regulator_unregister(pmic->rdev[id]); - kfree(pmic->rdev); - kfree(pmic->desc); - kfree(pmic); return ret; } @@ -788,10 +785,6 @@ static int __devexit palmas_remove(struct platform_device *pdev) for (id = 0; id < PALMAS_NUM_REGS; id++) regulator_unregister(pmic->rdev[id]); - - kfree(pmic->rdev); - kfree(pmic->desc); - kfree(pmic); return 0; } From 0c09d3150902d8ca7e763d508ac1a93701b21321 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 23 May 2012 09:08:08 +0800 Subject: [PATCH 112/475] regulator: anatop: Use correct __devexit_p annotation __devexit functions are discarded when CONFIG_HOTPLUG is not set, so the symbol needs to be referenced carefully. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/anatop-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c index 3660bace123..e82e7eaac0f 100644 --- a/drivers/regulator/anatop-regulator.c +++ b/drivers/regulator/anatop-regulator.c @@ -224,7 +224,7 @@ static struct platform_driver anatop_regulator_driver = { .of_match_table = of_anatop_regulator_match_tbl, }, .probe = anatop_regulator_probe, - .remove = anatop_regulator_remove, + .remove = __devexit_p(anatop_regulator_remove), }; static int __init anatop_regulator_init(void) From 7d4be2f5ad223942577c2319153b86592f3da5b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20St=C3=BCbner?= Date: Sun, 3 Jun 2012 21:30:33 +0200 Subject: [PATCH 113/475] regulator: gpio-regulator: do not pass drvdata pointer as reference Commit c172708d38a4 (regulator: core: Use a struct to pass in regulator runtime configuration) added the drvdata pointer only per reference to the new config array in the gpio-regulator. Signed-off-by: Heiko Stuebner Acked-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/gpio-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index 9997d7aaca8..ebe2b5c2e2d 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -286,7 +286,7 @@ static int __devinit gpio_regulator_probe(struct platform_device *pdev) cfg.dev = &pdev->dev; cfg.init_data = config->init_data; - cfg.driver_data = &drvdata; + cfg.driver_data = drvdata; drvdata->dev = regulator_register(&drvdata->desc, &cfg); if (IS_ERR(drvdata->dev)) { From 00926369b745fb0c1e5c27cec35f6adc9752f2c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20St=C3=BCbner?= Date: Sun, 3 Jun 2012 21:31:09 +0200 Subject: [PATCH 114/475] regulator: gpio-regulator: Fix finding of smallest value Commit 4dbd8f63f07a (regulator: gpio-regulator: Set the smallest voltage/current in the specified range) forgot to set the newly introduced best_val. Therefore it stayed always at INT_MAX thus breaking the setting of the voltage. Included is also an init value for target, as warnings about a possibly uninitialised target started appearing with this fix. Signed-off-by: Heiko Stuebner Acked-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/gpio-regulator.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index ebe2b5c2e2d..2c38bea5065 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -104,13 +104,15 @@ static int gpio_regulator_set_value(struct regulator_dev *dev, int min, int max) { struct gpio_regulator_data *data = rdev_get_drvdata(dev); - int ptr, target, state, best_val = INT_MAX; + int ptr, target = 0, state, best_val = INT_MAX; for (ptr = 0; ptr < data->nr_states; ptr++) if (data->states[ptr].value < best_val && data->states[ptr].value >= min && - data->states[ptr].value <= max) + data->states[ptr].value <= max) { target = data->states[ptr].gpios; + best_val = data->states[ptr].value; + } if (best_val == INT_MAX) return -EINVAL; From b0e4d7bf8b5704904a5d138d81a8ec8b7145767f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20St=C3=BCbner?= Date: Sun, 3 Jun 2012 21:32:05 +0200 Subject: [PATCH 115/475] regulator: gpio-regulator: populate selector from set_voltage This was missing until now and the underlying _regulator_do_set_voltage is using this value when calling list_voltage. Signed-off-by: Heiko Stuebner Acked-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/gpio-regulator.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index 2c38bea5065..242851a4c1a 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -101,7 +101,7 @@ static int gpio_regulator_get_value(struct regulator_dev *dev) } static int gpio_regulator_set_value(struct regulator_dev *dev, - int min, int max) + int min, int max, unsigned *selector) { struct gpio_regulator_data *data = rdev_get_drvdata(dev); int ptr, target = 0, state, best_val = INT_MAX; @@ -112,6 +112,8 @@ static int gpio_regulator_set_value(struct regulator_dev *dev, data->states[ptr].value <= max) { target = data->states[ptr].gpios; best_val = data->states[ptr].value; + if (selector) + *selector = ptr; } if (best_val == INT_MAX) @@ -130,7 +132,7 @@ static int gpio_regulator_set_voltage(struct regulator_dev *dev, int min_uV, int max_uV, unsigned *selector) { - return gpio_regulator_set_value(dev, min_uV, max_uV); + return gpio_regulator_set_value(dev, min_uV, max_uV, selector); } static int gpio_regulator_list_voltage(struct regulator_dev *dev, @@ -147,7 +149,7 @@ static int gpio_regulator_list_voltage(struct regulator_dev *dev, static int gpio_regulator_set_current_limit(struct regulator_dev *dev, int min_uA, int max_uA) { - return gpio_regulator_set_value(dev, min_uA, max_uA); + return gpio_regulator_set_value(dev, min_uA, max_uA, NULL); } static struct regulator_ops gpio_regulator_voltage_ops = { From d298caae9a920e1745599ddd4a19073d9a033df0 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 1 Jun 2012 18:03:00 +0100 Subject: [PATCH 116/475] ASoC: dapm: Fix connected widget capture path query. Make sure we check the correct path for capture. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 90ee77d2409..b47fe75444a 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -954,7 +954,7 @@ int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, if (stream == SNDRV_PCM_STREAM_PLAYBACK) paths = is_connected_output_ep(dai->playback_widget, list); else - paths = is_connected_input_ep(dai->playback_widget, list); + paths = is_connected_input_ep(dai->capture_widget, list); trace_snd_soc_dapm_connected(paths, stream); dapm_clear_walk(&card->dapm); From a1c85ec03738421668becb973f35fe6e1501c6e6 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Mon, 4 Jun 2012 09:42:54 +0800 Subject: [PATCH 117/475] ASoC: imx-audmux: add pinctrl support Signed-off-by: Richard Zhao Acked-by: Dong Aisheng Signed-off-by: Mark Brown --- sound/soc/fsl/imx-audmux.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index f23700359c6..080327414c6 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "imx-audmux.h" @@ -249,6 +250,7 @@ EXPORT_SYMBOL_GPL(imx_audmux_v2_configure_port); static int __devinit imx_audmux_probe(struct platform_device *pdev) { struct resource *res; + struct pinctrl *pinctrl; const struct of_device_id *of_id = of_match_device(imx_audmux_dt_ids, &pdev->dev); @@ -257,6 +259,12 @@ static int __devinit imx_audmux_probe(struct platform_device *pdev) if (!audmux_base) return -EADDRNOTAVAIL; + pinctrl = devm_pinctrl_get_select_default(&pdev->dev); + if (IS_ERR(pinctrl)) { + dev_err(&pdev->dev, "setup pinctrl failed!"); + return PTR_ERR(pinctrl); + } + audmux_clk = clk_get(&pdev->dev, "audmux"); if (IS_ERR(audmux_clk)) { dev_dbg(&pdev->dev, "cannot get clock: %ld\n", From 2e063c305a3da896ec3db99e66891411d0e38820 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Mon, 4 Jun 2012 13:36:34 +0530 Subject: [PATCH 118/475] OMAPDSS: DSI: Fix bug when calculating LP command interleaving parameters In function dsi_compute_interleave_lp(), the escape clock/LP clock time period is calculated incorrectly. The escape clock/LP clock is calculated as: LP Clock(Hz) = DSI_FCLK(Hz) / lp_clk_div Since we are calculating the time period of LP clock, the LP clock divider should be multiplied with the time period of DSI_FCLK. Calculating incorrect value of txclkesc results in incorrect calculation of LP interleaving parameters, it also creates a possibility of a divide by zero error. Reported-by: Sureshkumar Manimuthu Signed-off-by: Archit Taneja Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index ec363d8390e..ca8382d346e 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -3724,7 +3724,7 @@ static int dsi_compute_interleave_lp(int blank, int enter_hs, int exit_hs, /* CLKIN4DDR = 16 * TXBYTECLKHS */ tlp_avail = thsbyte_clk * (blank - trans_lp); - ttxclkesc = tdsi_fclk / lp_clk_div; + ttxclkesc = tdsi_fclk * lp_clk_div; lp_inter = ((tlp_avail - 8 * thsbyte_clk - 5 * tdsi_fclk) / ttxclkesc - 26) / 16; From c1bf94ec1e12d76838ad485158aecf208ebd8fb9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 31 May 2012 17:38:11 +0200 Subject: [PATCH 119/475] iommu/amd: Cache pdev pointer to root-bridge At some point pci_get_bus_and_slot started to enable interrupts. Since this function is used in the amd_iommu_resume path it will enable interrupts on resume which causes a warning. The fix will use a cached pointer to the root-bridge to re-enable the IOMMU in case the BIOS is broken. Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 13 +++++-------- drivers/iommu/amd_iommu_types.h | 3 +++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index c56790375e0..542024ba6db 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1029,6 +1029,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->dev) return 1; + iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number, + PCI_DEVFN(0, 0)); + iommu->cap_ptr = h->cap_ptr; iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; @@ -1323,20 +1326,16 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu) { int i, j; u32 ioc_feature_control; - struct pci_dev *pdev = NULL; + struct pci_dev *pdev = iommu->root_pdev; /* RD890 BIOSes may not have completely reconfigured the iommu */ - if (!is_rd890_iommu(iommu->dev)) + if (!is_rd890_iommu(iommu->dev) || !pdev) return; /* * First, we need to ensure that the iommu is enabled. This is * controlled by a register in the northbridge */ - pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); - - if (!pdev) - return; /* Select Northbridge indirect register 0x75 and enable writing */ pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); @@ -1346,8 +1345,6 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu) if (!(ioc_feature_control & 0x1)) pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); - pci_dev_put(pdev); - /* Restore the iommu BAR */ pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, iommu->stored_addr_lo); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 2452f3b7173..24355559a2a 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -481,6 +481,9 @@ struct amd_iommu { /* Pointer to PCI device of this IOMMU */ struct pci_dev *dev; + /* Cache pdev to root device for resume quirks */ + struct pci_dev *root_pdev; + /* physical address of MMIO space */ u64 mmio_phys; /* virtual address of MMIO space */ From eee53537c476c947bf7faa1c916d2f5a0ae8ec93 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 1 Jun 2012 15:20:23 +0200 Subject: [PATCH 120/475] iommu/amd: Fix deadlock in ppr-handling error path In the error path of the ppr_notifer it can happen that the iommu->lock is taken recursivly. This patch fixes the problem by releasing the iommu->lock before any notifier is invoked. This also requires to move the erratum workaround for the ppr-log (interrupt may be faster than data in the log) one function up. Cc: stable@vger.kernel.org # v3.3, v3.4 Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 71 ++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d90a421e9ca..a2e418cba0f 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -547,26 +547,12 @@ static void iommu_poll_events(struct amd_iommu *iommu) spin_unlock_irqrestore(&iommu->lock, flags); } -static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) +static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) { struct amd_iommu_fault fault; - volatile u64 *raw; - int i; INC_STATS_COUNTER(pri_requests); - raw = (u64 *)(iommu->ppr_log + head); - - /* - * Hardware bug: Interrupt may arrive before the entry is written to - * memory. If this happens we need to wait for the entry to arrive. - */ - for (i = 0; i < LOOP_TIMEOUT; ++i) { - if (PPR_REQ_TYPE(raw[0]) != 0) - break; - udelay(1); - } - if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); return; @@ -578,12 +564,6 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) fault.tag = PPR_TAG(raw[0]); fault.flags = PPR_FLAGS(raw[0]); - /* - * To detect the hardware bug we need to clear the entry - * to back to zero. - */ - raw[0] = raw[1] = 0; - atomic_notifier_call_chain(&ppr_notifier, 0, &fault); } @@ -595,25 +575,62 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) if (iommu->ppr_log == NULL) return; + /* enable ppr interrupts again */ + writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); + spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); while (head != tail) { + volatile u64 *raw; + u64 entry[2]; + int i; - /* Handle PPR entry */ - iommu_handle_ppr_entry(iommu, head); + raw = (u64 *)(iommu->ppr_log + head); - /* Update and refresh ring-buffer state*/ + /* + * Hardware bug: Interrupt may arrive before the entry is + * written to memory. If this happens we need to wait for the + * entry to arrive. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + if (PPR_REQ_TYPE(raw[0]) != 0) + break; + udelay(1); + } + + /* Avoid memcpy function-call overhead */ + entry[0] = raw[0]; + entry[1] = raw[1]; + + /* + * To detect the hardware bug we need to clear the entry + * back to zero. + */ + raw[0] = raw[1] = 0UL; + + /* Update head pointer of hardware ring-buffer */ head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + + /* + * Release iommu->lock because ppr-handling might need to + * re-aquire it + */ + spin_unlock_irqrestore(&iommu->lock, flags); + + /* Handle PPR entry */ + iommu_handle_ppr_entry(iommu, entry); + + spin_lock_irqsave(&iommu->lock, flags); + + /* Refresh ring-buffer information */ + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); } - /* enable ppr interrupts again */ - writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); - spin_unlock_irqrestore(&iommu->lock, flags); } From 4d5a0565cebf12c2ef854e4ac1961f13a710a950 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 30 Apr 2012 11:17:25 +0200 Subject: [PATCH 121/475] Btrfs: remove call to btrfs_header_nritems with no effect This is a leftover from cleanup patch 559af821. Before the cleanup, btrfs_header_nritems was called inside an if condition. As it has no side effects we need to preserve here, it should simply be dropped. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d7a96cfdc50..836e4e03edc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1650,8 +1650,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - btrfs_header_nritems(mid); - left = read_node_slot(root, parent, pslot - 1); if (left) { btrfs_tree_lock(left); @@ -1681,7 +1679,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = push_node_left(trans, root, left, mid, 1); if (wret < 0) ret = wret; - btrfs_header_nritems(mid); } /* From ae58d1e406986f31d1e88b32f5ac601506c196d8 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 18 May 2012 09:29:34 -0600 Subject: [PATCH 122/475] i2c: Add generic I2C multiplexer using pinctrl API This is useful for SoCs whose I2C module's signals can be routed to different sets of pins at run-time, using the pinctrl API. +-----+ +-----+ | dev | | dev | +------------------------+ +-----+ +-----+ | SoC | | | | /----|------+--------+ | +---+ +------+ | child bus A, on first set of pins | |I2C|---|Pinmux| | | +---+ +------+ | child bus B, on second set of pins | \----|------+--------+--------+ | | | | | +------------------------+ +-----+ +-----+ +-----+ | dev | | dev | | dev | +-----+ +-----+ +-----+ Signed-off-by: Stephen Warren Acked-by: Linus Walleij Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- .../bindings/i2c/i2c-mux-pinctrl.txt | 93 ++++++ drivers/i2c/muxes/Kconfig | 12 + drivers/i2c/muxes/Makefile | 1 + drivers/i2c/muxes/i2c-mux-pinctrl.c | 279 ++++++++++++++++++ include/linux/i2c-mux-pinctrl.h | 41 +++ 5 files changed, 426 insertions(+) create mode 100644 Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt create mode 100644 drivers/i2c/muxes/i2c-mux-pinctrl.c create mode 100644 include/linux/i2c-mux-pinctrl.h diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt new file mode 100644 index 00000000000..ae8af1694e9 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt @@ -0,0 +1,93 @@ +Pinctrl-based I2C Bus Mux + +This binding describes an I2C bus multiplexer that uses pin multiplexing to +route the I2C signals, and represents the pin multiplexing configuration +using the pinctrl device tree bindings. + + +-----+ +-----+ + | dev | | dev | + +------------------------+ +-----+ +-----+ + | SoC | | | + | /----|------+--------+ + | +---+ +------+ | child bus A, on first set of pins + | |I2C|---|Pinmux| | + | +---+ +------+ | child bus B, on second set of pins + | \----|------+--------+--------+ + | | | | | + +------------------------+ +-----+ +-----+ +-----+ + | dev | | dev | | dev | + +-----+ +-----+ +-----+ + +Required properties: +- compatible: i2c-mux-pinctrl +- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side + port is connected to. + +Also required are: + +* Standard pinctrl properties that specify the pin mux state for each child + bus. See ../pinctrl/pinctrl-bindings.txt. + +* Standard I2C mux properties. See mux.txt in this directory. + +* I2C child bus nodes. See mux.txt in this directory. + +For each named state defined in the pinctrl-names property, an I2C child bus +will be created. I2C child bus numbers are assigned based on the index into +the pinctrl-names property. + +The only exception is that no bus will be created for a state named "idle". If +such a state is defined, it must be the last entry in pinctrl-names. For +example: + + pinctrl-names = "ddc", "pta", "idle" -> ddc = bus 0, pta = bus 1 + pinctrl-names = "ddc", "idle", "pta" -> Invalid ("idle" not last) + pinctrl-names = "idle", "ddc", "pta" -> Invalid ("idle" not last) + +Whenever an access is made to a device on a child bus, the relevant pinctrl +state will be programmed into hardware. + +If an idle state is defined, whenever an access is not being made to a device +on a child bus, the idle pinctrl state will be programmed into hardware. + +If an idle state is not defined, the most recently used pinctrl state will be +left programmed into hardware whenever no access is being made of a device on +a child bus. + +Example: + + i2cmux { + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + + i2c-parent = <&i2c1>; + + pinctrl-names = "ddc", "pta", "idle"; + pinctrl-0 = <&state_i2cmux_ddc>; + pinctrl-1 = <&state_i2cmux_pta>; + pinctrl-2 = <&state_i2cmux_idle>; + + i2c@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + eeprom { + compatible = "eeprom"; + reg = <0x50>; + }; + }; + + i2c@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + eeprom { + compatible = "eeprom"; + reg = <0x50>; + }; + }; + }; + diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index beb2491db27..a0edd985421 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -37,4 +37,16 @@ config I2C_MUX_PCA954x This driver can also be built as a module. If so, the module will be called i2c-mux-pca954x. +config I2C_MUX_PINCTRL + tristate "pinctrl-based I2C multiplexer" + depends on PINCTRL + help + If you say yes to this option, support will be included for an I2C + multiplexer that uses the pinctrl subsystem, i.e. pin multiplexing. + This is useful for SoCs whose I2C module's signals can be routed to + different sets of pins at run-time. + + This driver can also be built as a module. If so, the module will be + called pinctrl-i2cmux. + endmenu diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile index 5826249b29c..76da8692aff 100644 --- a/drivers/i2c/muxes/Makefile +++ b/drivers/i2c/muxes/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_I2C_MUX_GPIO) += i2c-mux-gpio.o obj-$(CONFIG_I2C_MUX_PCA9541) += i2c-mux-pca9541.o obj-$(CONFIG_I2C_MUX_PCA954x) += i2c-mux-pca954x.o +obj-$(CONFIG_I2C_MUX_PINCTRL) += i2c-mux-pinctrl.o ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c new file mode 100644 index 00000000000..46a66976347 --- /dev/null +++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c @@ -0,0 +1,279 @@ +/* + * I2C multiplexer using pinctrl API + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct i2c_mux_pinctrl { + struct device *dev; + struct i2c_mux_pinctrl_platform_data *pdata; + struct pinctrl *pinctrl; + struct pinctrl_state **states; + struct pinctrl_state *state_idle; + struct i2c_adapter *parent; + struct i2c_adapter **busses; +}; + +static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data, + u32 chan) +{ + struct i2c_mux_pinctrl *mux = data; + + return pinctrl_select_state(mux->pinctrl, mux->states[chan]); +} + +static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data, + u32 chan) +{ + struct i2c_mux_pinctrl *mux = data; + + return pinctrl_select_state(mux->pinctrl, mux->state_idle); +} + +#ifdef CONFIG_OF +static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux, + struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + int num_names, i, ret; + struct device_node *adapter_np; + struct i2c_adapter *adapter; + + if (!np) + return 0; + + mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL); + if (!mux->pdata) { + dev_err(mux->dev, + "Cannot allocate i2c_mux_pinctrl_platform_data\n"); + return -ENOMEM; + } + + num_names = of_property_count_strings(np, "pinctrl-names"); + if (num_names < 0) { + dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n", + num_names); + return num_names; + } + + mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev, + sizeof(*mux->pdata->pinctrl_states) * num_names, + GFP_KERNEL); + if (!mux->pdata->pinctrl_states) { + dev_err(mux->dev, "Cannot allocate pinctrl_states\n"); + return -ENOMEM; + } + + for (i = 0; i < num_names; i++) { + ret = of_property_read_string_index(np, "pinctrl-names", i, + &mux->pdata->pinctrl_states[mux->pdata->bus_count]); + if (ret < 0) { + dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n", + ret); + return ret; + } + if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count], + "idle")) { + if (i != num_names - 1) { + dev_err(mux->dev, "idle state must be last\n"); + return -EINVAL; + } + mux->pdata->pinctrl_state_idle = "idle"; + } else { + mux->pdata->bus_count++; + } + } + + adapter_np = of_parse_phandle(np, "i2c-parent", 0); + if (!adapter_np) { + dev_err(mux->dev, "Cannot parse i2c-parent\n"); + return -ENODEV; + } + adapter = of_find_i2c_adapter_by_node(adapter_np); + if (!adapter) { + dev_err(mux->dev, "Cannot find parent bus\n"); + return -ENODEV; + } + mux->pdata->parent_bus_num = i2c_adapter_id(adapter); + put_device(&adapter->dev); + + return 0; +} +#else +static inline int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux, + struct platform_device *pdev) +{ + return 0; +} +#endif + +static int __devinit i2c_mux_pinctrl_probe(struct platform_device *pdev) +{ + struct i2c_mux_pinctrl *mux; + int (*deselect)(struct i2c_adapter *, void *, u32); + int i, ret; + + mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL); + if (!mux) { + dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n"); + ret = -ENOMEM; + goto err; + } + platform_set_drvdata(pdev, mux); + + mux->dev = &pdev->dev; + + mux->pdata = pdev->dev.platform_data; + if (!mux->pdata) { + ret = i2c_mux_pinctrl_parse_dt(mux, pdev); + if (ret < 0) + goto err; + } + if (!mux->pdata) { + dev_err(&pdev->dev, "Missing platform data\n"); + ret = -ENODEV; + goto err; + } + + mux->states = devm_kzalloc(&pdev->dev, + sizeof(*mux->states) * mux->pdata->bus_count, + GFP_KERNEL); + if (!mux->states) { + dev_err(&pdev->dev, "Cannot allocate states\n"); + ret = -ENOMEM; + goto err; + } + + mux->busses = devm_kzalloc(&pdev->dev, + sizeof(mux->busses) * mux->pdata->bus_count, + GFP_KERNEL); + if (!mux->states) { + dev_err(&pdev->dev, "Cannot allocate busses\n"); + ret = -ENOMEM; + goto err; + } + + mux->pinctrl = devm_pinctrl_get(&pdev->dev); + if (IS_ERR(mux->pinctrl)) { + ret = PTR_ERR(mux->pinctrl); + dev_err(&pdev->dev, "Cannot get pinctrl: %d\n", ret); + goto err; + } + for (i = 0; i < mux->pdata->bus_count; i++) { + mux->states[i] = pinctrl_lookup_state(mux->pinctrl, + mux->pdata->pinctrl_states[i]); + if (IS_ERR(mux->states[i])) { + ret = PTR_ERR(mux->states[i]); + dev_err(&pdev->dev, + "Cannot look up pinctrl state %s: %d\n", + mux->pdata->pinctrl_states[i], ret); + goto err; + } + } + if (mux->pdata->pinctrl_state_idle) { + mux->state_idle = pinctrl_lookup_state(mux->pinctrl, + mux->pdata->pinctrl_state_idle); + if (IS_ERR(mux->state_idle)) { + ret = PTR_ERR(mux->state_idle); + dev_err(&pdev->dev, + "Cannot look up pinctrl state %s: %d\n", + mux->pdata->pinctrl_state_idle, ret); + goto err; + } + + deselect = i2c_mux_pinctrl_deselect; + } else { + deselect = NULL; + } + + mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num); + if (!mux->parent) { + dev_err(&pdev->dev, "Parent adapter (%d) not found\n", + mux->pdata->parent_bus_num); + ret = -ENODEV; + goto err; + } + + for (i = 0; i < mux->pdata->bus_count; i++) { + u32 bus = mux->pdata->base_bus_num ? + (mux->pdata->base_bus_num + i) : 0; + + mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev, + mux, bus, i, + i2c_mux_pinctrl_select, + deselect); + if (!mux->busses[i]) { + ret = -ENODEV; + dev_err(&pdev->dev, "Failed to add adapter %d\n", i); + goto err_del_adapter; + } + } + + return 0; + +err_del_adapter: + for (; i > 0; i--) + i2c_del_mux_adapter(mux->busses[i - 1]); + i2c_put_adapter(mux->parent); +err: + return ret; +} + +static int __devexit i2c_mux_pinctrl_remove(struct platform_device *pdev) +{ + struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < mux->pdata->bus_count; i++) + i2c_del_mux_adapter(mux->busses[i]); + + i2c_put_adapter(mux->parent); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id i2c_mux_pinctrl_of_match[] __devinitconst = { + { .compatible = "i2c-mux-pinctrl", }, + {}, +}; +MODULE_DEVICE_TABLE(of, i2c_mux_pinctrl_of_match); +#endif + +static struct platform_driver i2c_mux_pinctrl_driver = { + .driver = { + .name = "i2c-mux-pinctrl", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(i2c_mux_pinctrl_of_match), + }, + .probe = i2c_mux_pinctrl_probe, + .remove = __devexit_p(i2c_mux_pinctrl_remove), +}; +module_platform_driver(i2c_mux_pinctrl_driver); + +MODULE_DESCRIPTION("pinctrl-based I2C multiplexer driver"); +MODULE_AUTHOR("Stephen Warren "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:i2c-mux-pinctrl"); diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h new file mode 100644 index 00000000000..a65c86429e8 --- /dev/null +++ b/include/linux/i2c-mux-pinctrl.h @@ -0,0 +1,41 @@ +/* + * i2c-mux-pinctrl platform data + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _LINUX_I2C_MUX_PINCTRL_H +#define _LINUX_I2C_MUX_PINCTRL_H + +/** + * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl + * @parent_bus_num: Parent I2C bus number + * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic. + * @bus_count: Number of child busses. Also the number of elements in + * @pinctrl_states + * @pinctrl_states: The names of the pinctrl state to select for each child bus + * @pinctrl_state_idle: The pinctrl state to select when no child bus is being + * accessed. If NULL, the most recently used pinctrl state will be left + * selected. + */ +struct i2c_mux_pinctrl_platform_data { + int parent_bus_num; + int base_bus_num; + int bus_count; + const char **pinctrl_states; + const char *pinctrl_state_idle; +}; + +#endif From 820234c86b982205aeeebde1ad71ccfe6bc4cc80 Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:19 +0300 Subject: [PATCH 123/475] usb: gadget: atmel_usba_udc: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/atmel_usba_udc.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c index e23bf7984aa..9a9bced813e 100644 --- a/drivers/usb/gadget/atmel_usba_udc.c +++ b/drivers/usb/gadget/atmel_usba_udc.c @@ -599,12 +599,6 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) spin_lock_irqsave(&ep->udc->lock, flags); - if (ep->ep.desc) { - spin_unlock_irqrestore(&ep->udc->lock, flags); - DBG(DBG_ERR, "ep%d already enabled\n", ep->index); - return -EBUSY; - } - ep->ep.desc = desc; ep->ep.maxpacket = maxpacket; From fc065a09524991a5138835c6499e895d6be14049 Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:20 +0300 Subject: [PATCH 124/475] usb: gadget: fsl_qe_udc: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/fsl_qe_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 51881f3bd07..b09452d6f33 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -1596,7 +1596,7 @@ static int qe_ep_enable(struct usb_ep *_ep, ep = container_of(_ep, struct qe_ep, ep); /* catch various bogus parameters */ - if (!_ep || !desc || ep->ep.desc || _ep->name == ep_name[0] || + if (!_ep || !desc || _ep->name == ep_name[0] || (desc->bDescriptorType != USB_DT_ENDPOINT)) return -EINVAL; From 1fa75972c7b9f165af102022aa0090fe0a3a6c16 Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:21 +0300 Subject: [PATCH 125/475] usb: gadget: fsl_udc_core: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/fsl_udc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 28316858208..79984182198 100644 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -567,7 +567,7 @@ static int fsl_ep_enable(struct usb_ep *_ep, ep = container_of(_ep, struct fsl_ep, ep); /* catch various bogus parameters */ - if (!_ep || !desc || ep->ep.desc + if (!_ep || !desc || (desc->bDescriptorType != USB_DT_ENDPOINT)) return -EINVAL; From aedaa44dd3214886d70fad92197b98a0ba286cb8 Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:22 +0300 Subject: [PATCH 126/475] usb: gadget: goku_udc: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/goku_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index b241e6c6a7f..3d28fb976c7 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -102,7 +102,7 @@ goku_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) unsigned long flags; ep = container_of(_ep, struct goku_ep, ep); - if (!_ep || !desc || ep->ep.desc + if (!_ep || !desc || desc->bDescriptorType != USB_DT_ENDPOINT) return -EINVAL; dev = ep->dev; From e0f4f9d4c9138c40738104f80bd5008517fc712d Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:24 +0300 Subject: [PATCH 127/475] usb: gadget: mv_udc_core: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Acked-by: Yu Xu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/mv_udc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c index dbcd1329495..117a4bba1b8 100644 --- a/drivers/usb/gadget/mv_udc_core.c +++ b/drivers/usb/gadget/mv_udc_core.c @@ -464,7 +464,7 @@ static int mv_ep_enable(struct usb_ep *_ep, ep = container_of(_ep, struct mv_ep, ep); udc = ep->udc; - if (!_ep || !desc || ep->ep.desc + if (!_ep || !desc || desc->bDescriptorType != USB_DT_ENDPOINT) return -EINVAL; From 77964b3ce862262e4a22d4e5093c158f72439a0b Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:25 +0300 Subject: [PATCH 128/475] usb: gadget: omap_udc: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/omap_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 7ba32469c5b..a460e8c204f 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -153,7 +153,7 @@ static int omap_ep_enable(struct usb_ep *_ep, u16 maxp; /* catch various bogus parameters */ - if (!_ep || !desc || ep->ep.desc + if (!_ep || !desc || desc->bDescriptorType != USB_DT_ENDPOINT || ep->bEndpointAddress != desc->bEndpointAddress || ep->maxpacket < usb_endpoint_maxp(desc)) { From 3ba0b31aed94167998defadfbf17d2196db05814 Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:26 +0300 Subject: [PATCH 129/475] usb: gadget: pxa25x_udc: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/pxa25x_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c index d7c8cb3bf75..f7ff9e8e746 100644 --- a/drivers/usb/gadget/pxa25x_udc.c +++ b/drivers/usb/gadget/pxa25x_udc.c @@ -218,7 +218,7 @@ static int pxa25x_ep_enable (struct usb_ep *_ep, struct pxa25x_udc *dev; ep = container_of (_ep, struct pxa25x_ep, ep); - if (!_ep || !desc || ep->ep.desc || _ep->name == ep0name + if (!_ep || !desc || _ep->name == ep0name || desc->bDescriptorType != USB_DT_ENDPOINT || ep->bEndpointAddress != desc->bEndpointAddress || ep->fifo_size < usb_endpoint_maxp (desc)) { From 0561ed440fbab943e1c881cab626bd7019647385 Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:27 +0300 Subject: [PATCH 130/475] usb: gadget: s3c2410_udc: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/s3c2410_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index 3de71d37d75..f2e51f50e52 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -1062,7 +1062,7 @@ static int s3c2410_udc_ep_enable(struct usb_ep *_ep, ep = to_s3c2410_ep(_ep); - if (!_ep || !desc || ep->ep.desc + if (!_ep || !desc || _ep->name == ep0name || desc->bDescriptorType != USB_DT_ENDPOINT) return -EINVAL; From 109f0f718375e12bfdee53f025d85d94efed32a3 Mon Sep 17 00:00:00 2001 From: Ido Shayevitz Date: Mon, 4 Jun 2012 13:35:28 +0300 Subject: [PATCH 131/475] usb: gadget: s3c-hsudc.c: Remove unneeded condition The removed condition is always true, since the endpoint descriptor is set prior to calling the enable endpoint. Signed-off-by: Ido Shayevitz Acked-by: Heiko Stuebner Signed-off-by: Felipe Balbi --- drivers/usb/gadget/s3c-hsudc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/s3c-hsudc.c b/drivers/usb/gadget/s3c-hsudc.c index 36c6836eeb0..236b271871a 100644 --- a/drivers/usb/gadget/s3c-hsudc.c +++ b/drivers/usb/gadget/s3c-hsudc.c @@ -760,7 +760,7 @@ static int s3c_hsudc_ep_enable(struct usb_ep *_ep, u32 ecr = 0; hsep = our_ep(_ep); - if (!_ep || !desc || hsep->ep.desc || _ep->name == ep0name + if (!_ep || !desc || _ep->name == ep0name || desc->bDescriptorType != USB_DT_ENDPOINT || hsep->bEndpointAddress != desc->bEndpointAddress || ep_maxpacket(hsep) < usb_endpoint_maxp(desc)) From 80e91fd59bc5291c6fe322551f644a58cf2c5e98 Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Mon, 4 Jun 2012 17:24:03 +0200 Subject: [PATCH 132/475] usb: gadget: regression fix - usage of usb_ep This patch removes redundant pointer to struct usb_endpoint_descriptor which were missed in commit 79149b8: usb: gadget: Update fsl_udc_core to use usb_endpoint_descriptor inside the struct usb_ep Due to clock framework regressions, this patch is only compile tested! Signed-off-by: Christoph Fritz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/fsl_udc_core.c | 2 +- drivers/usb/gadget/fsl_usb2_udc.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 79984182198..bc6f9bb9994 100644 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -2575,7 +2575,7 @@ static int __init fsl_udc_probe(struct platform_device *pdev) /* for ep0: the desc defined here; * for other eps, gadget layer called ep_enable with defined desc */ - udc_controller->eps[0].desc = &fsl_ep0_desc; + udc_controller->eps[0].ep.desc = &fsl_ep0_desc; udc_controller->eps[0].ep.maxpacket = USB_MAX_CTRL_PAYLOAD; /* setup the udc->eps[] for non-control endpoints and link diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h index 5cd7b7e7ddb..f61a967f708 100644 --- a/drivers/usb/gadget/fsl_usb2_udc.h +++ b/drivers/usb/gadget/fsl_usb2_udc.h @@ -568,10 +568,10 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length) /* * ### internal used help routines. */ -#define ep_index(EP) ((EP)->desc->bEndpointAddress&0xF) +#define ep_index(EP) ((EP)->ep.desc->bEndpointAddress&0xF) #define ep_maxpacket(EP) ((EP)->ep.maxpacket) #define ep_is_in(EP) ( (ep_index(EP) == 0) ? (EP->udc->ep0_dir == \ - USB_DIR_IN ):((EP)->desc->bEndpointAddress \ + USB_DIR_IN) : ((EP)->ep.desc->bEndpointAddress \ & USB_DIR_IN)==USB_DIR_IN) #define get_ep_by_pipe(udc, pipe) ((pipe == 1)? &udc->eps[0]: \ &udc->eps[pipe]) From 6594b2d7b1ef8260e6e36ddc96bd37a40e39ba80 Mon Sep 17 00:00:00 2001 From: Jon Povey Date: Fri, 25 May 2012 10:50:18 +0900 Subject: [PATCH 133/475] usb: musb: davinci: Fix build breakage This appears to have been broken by commit 5cfb19ac604a68c030b245561f575c2d1bac1d49 (ARM: davinci: streamline sysmod access) For now, fix by hardcoding USB_PHY_CTRL and DM355_DEEPSLEEP Tested on DM365 with defconfig changes. Signed-off-by: Jon Povey Acked-by: Sekhar Nori CC: Felipe Balbi Cc: # v3.4.x Signed-off-by: Felipe Balbi --- drivers/usb/musb/davinci.c | 1 + drivers/usb/musb/davinci.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c index 768b4b55c81..9d63ba4d10d 100644 --- a/drivers/usb/musb/davinci.c +++ b/drivers/usb/musb/davinci.c @@ -34,6 +34,7 @@ #include #include +#include #include diff --git a/drivers/usb/musb/davinci.h b/drivers/usb/musb/davinci.h index 046c84433ca..371baa0ee50 100644 --- a/drivers/usb/musb/davinci.h +++ b/drivers/usb/musb/davinci.h @@ -15,7 +15,7 @@ */ /* Integrated highspeed/otg PHY */ -#define USBPHY_CTL_PADDR (DAVINCI_SYSTEM_MODULE_BASE + 0x34) +#define USBPHY_CTL_PADDR 0x01c40034 #define USBPHY_DATAPOL BIT(11) /* (dm355) switch D+/D- */ #define USBPHY_PHYCLKGD BIT(8) #define USBPHY_SESNDEN BIT(7) /* v(sess_end) comparator */ @@ -27,7 +27,7 @@ #define USBPHY_OTGPDWN BIT(1) #define USBPHY_PHYPDWN BIT(0) -#define DM355_DEEPSLEEP_PADDR (DAVINCI_SYSTEM_MODULE_BASE + 0x48) +#define DM355_DEEPSLEEP_PADDR 0x01c40048 #define DRVVBUS_FORCE BIT(2) #define DRVVBUS_OVERRIDE BIT(1) From 08f75bf14fadaa81fe362d5acda9b77b113dd0a2 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sat, 26 May 2012 00:21:33 +0300 Subject: [PATCH 134/475] usb: musb_gadget: fix crash caused by dangling pointer usb_ep_ops.disable must clear external copy of the endpoint descriptor, otherwise musb crashes after loading/unloading several gadget modules in a row: Unable to handle kernel paging request at virtual address bf013730 pgd = c0004000 [bf013730] *pgd=8f26d811, *pte=00000000, *ppte=00000000 Internal error: Oops: 7 [#1] Modules linked in: g_cdc [last unloaded: g_file_storage] CPU: 0 Not tainted (3.2.17 #647) PC is at musb_gadget_enable+0x4c/0x24c LR is at _raw_spin_lock_irqsave+0x4c/0x58 [] (musb_gadget_enable+0x4c/0x24c) from [] (gether_connect+0x3c/0x19c [g_cdc]) [] (gether_connect+0x3c/0x19c [g_cdc]) from [] (ecm_set_alt+0x15c/0x180 [g_cdc]) [] (ecm_set_alt+0x15c/0x180 [g_cdc]) from [] (composite_setup+0x85c/0xac4 [g_cdc]) [] (composite_setup+0x85c/0xac4 [g_cdc]) from [] (musb_g_ep0_irq+0x844/0x924) [] (musb_g_ep0_irq+0x844/0x924) from [] (musb_interrupt+0x79c/0x864) [] (musb_interrupt+0x79c/0x864) from [] (generic_interrupt+0x64/0x7c) [] (generic_interrupt+0x64/0x7c) from [] (handle_irq_event_percpu+0x28/0x178) ... Cc: stable@vger.kernel.org # v3.1+ Signed-off-by: Grazvydas Ignotas Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index f42c29b11f7..95918dacc99 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1232,6 +1232,7 @@ static int musb_gadget_disable(struct usb_ep *ep) } musb_ep->desc = NULL; + musb_ep->end_point.desc = NULL; /* abort all pending DMA and requests */ nuke(musb_ep, -ESHUTDOWN); From bec4596b4e6770c7037f21f6bd27567b152dc0d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Jun 2012 00:18:19 +0000 Subject: [PATCH 135/475] drop_monitor: dont sleep in atomic context drop_monitor calls several sleeping functions while in atomic context. BUG: sleeping function called from invalid context at mm/slub.c:943 in_atomic(): 1, irqs_disabled(): 0, pid: 2103, name: kworker/0:2 Pid: 2103, comm: kworker/0:2 Not tainted 3.5.0-rc1+ #55 Call Trace: [] __might_sleep+0xca/0xf0 [] kmem_cache_alloc_node+0x1b3/0x1c0 [] ? queue_delayed_work_on+0x11c/0x130 [] __alloc_skb+0x4b/0x230 [] ? reset_per_cpu_data+0x160/0x160 [drop_monitor] [] reset_per_cpu_data+0x2f/0x160 [drop_monitor] [] send_dm_alert+0x4b/0xb0 [drop_monitor] [] process_one_work+0x130/0x4c0 [] worker_thread+0x159/0x360 [] ? manage_workers.isra.27+0x240/0x240 [] kthread+0x93/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? kthread_freezable_should_stop+0x80/0x80 [] ? gs_change+0xb/0xb Rework the logic to call the sleeping functions in right context. Use standard timer/workqueue api to let system chose any cpu to perform the allocation and netlink send. Also avoid a loop if reset_per_cpu_data() cannot allocate memory : use mod_timer() to wait 1/10 second before next try. Signed-off-by: Eric Dumazet Cc: Neil Horman Reviewed-by: Neil Horman Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 102 +++++++++++++--------------------------- 1 file changed, 33 insertions(+), 69 deletions(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ea5fb9fcc3f..d23b6682f4e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -36,9 +36,6 @@ #define TRACE_ON 1 #define TRACE_OFF 0 -static void send_dm_alert(struct work_struct *unused); - - /* * Globals, our netlink socket pointer * and the work handle that will send up @@ -48,11 +45,10 @@ static int trace_state = TRACE_OFF; static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { - struct work_struct dm_alert_work; - struct sk_buff __rcu *skb; - atomic_t dm_hit_count; - struct timer_list send_timer; - int cpu; + spinlock_t lock; + struct sk_buff *skb; + struct work_struct dm_alert_work; + struct timer_list send_timer; }; struct dm_hw_stat_delta { @@ -78,13 +74,13 @@ static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); -static void reset_per_cpu_data(struct per_cpu_dm_data *data) +static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; struct sk_buff *skb; - struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); + unsigned long flags; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -99,65 +95,40 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) sizeof(struct net_dm_alert_msg)); msg = nla_data(nla); memset(msg, 0, al); - } else - schedule_work_on(data->cpu, &data->dm_alert_work); - - /* - * Don't need to lock this, since we are guaranteed to only - * run this on a single cpu at a time. - * Note also that we only update data->skb if the old and new skb - * pointers don't match. This ensures that we don't continually call - * synchornize_rcu if we repeatedly fail to alloc a new netlink message. - */ - if (skb != oskb) { - rcu_assign_pointer(data->skb, skb); - - synchronize_rcu(); - - atomic_set(&data->dm_hit_count, dm_hit_limit); + } else { + mod_timer(&data->send_timer, jiffies + HZ / 10); } + spin_lock_irqsave(&data->lock, flags); + swap(data->skb, skb); + spin_unlock_irqrestore(&data->lock, flags); + + return skb; } -static void send_dm_alert(struct work_struct *unused) +static void send_dm_alert(struct work_struct *work) { struct sk_buff *skb; - struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data; - WARN_ON_ONCE(data->cpu != smp_processor_id()); + data = container_of(work, struct per_cpu_dm_data, dm_alert_work); - /* - * Grab the skb we're about to send - */ - skb = rcu_dereference_protected(data->skb, 1); + skb = reset_per_cpu_data(data); - /* - * Replace it with a new one - */ - reset_per_cpu_data(data); - - /* - * Ship it! - */ if (skb) genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); - - put_cpu_var(dm_cpu_data); } /* * This is the timer function to delay the sending of an alert * in the event that more drops will arrive during the - * hysteresis period. Note that it operates under the timer interrupt - * so we don't need to disable preemption here + * hysteresis period. */ -static void sched_send_work(unsigned long unused) +static void sched_send_work(unsigned long _data) { - struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; - schedule_work_on(smp_processor_id(), &data->dm_alert_work); - - put_cpu_var(dm_cpu_data); + schedule_work(&data->dm_alert_work); } static void trace_drop_common(struct sk_buff *skb, void *location) @@ -167,33 +138,28 @@ static void trace_drop_common(struct sk_buff *skb, void *location) struct nlattr *nla; int i; struct sk_buff *dskb; - struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data; + unsigned long flags; - - rcu_read_lock(); - dskb = rcu_dereference(data->skb); + local_irq_save(flags); + data = &__get_cpu_var(dm_cpu_data); + spin_lock(&data->lock); + dskb = data->skb; if (!dskb) goto out; - if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { - /* - * we're already at zero, discard this hit - */ - goto out; - } - nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); for (i = 0; i < msg->entries; i++) { if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { msg->points[i].count++; - atomic_inc(&data->dm_hit_count); goto out; } } - + if (msg->entries == dm_hit_limit) + goto out; /* * We need to create a new entry */ @@ -205,13 +171,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location) if (!timer_pending(&data->send_timer)) { data->send_timer.expires = jiffies + dm_delay * HZ; - add_timer_on(&data->send_timer, smp_processor_id()); + add_timer(&data->send_timer); } out: - rcu_read_unlock(); - put_cpu_var(dm_cpu_data); - return; + spin_unlock_irqrestore(&data->lock, flags); } static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) @@ -418,11 +382,11 @@ static int __init init_net_drop_monitor(void) for_each_possible_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); - data->cpu = cpu; INIT_WORK(&data->dm_alert_work, send_dm_alert); init_timer(&data->send_timer); - data->send_timer.data = cpu; + data->send_timer.data = (unsigned long)data; data->send_timer.function = sched_send_work; + spin_lock_init(&data->lock); reset_per_cpu_data(data); } From 9ec0db71af04f4560e27a3c2f5a0411ba3155198 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 4 Jun 2012 05:51:18 +0000 Subject: [PATCH 136/475] net: icplus: fix interrupt mask This patch fixes the interrupt mask for IC101 A/G devices and now enables the link/speed/duplex interrupts. This is done by setting the "INTR pin used" bit and cleaning all the other bits in the Register 17. Reported-by: Stuart Menefy Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/phy/icplus.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 5ac46f5226f..47f8e893926 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -41,6 +41,8 @@ MODULE_LICENSE("GPL"); #define IP1001_APS_ON 11 /* IP1001 APS Mode bit */ #define IP101A_G_APS_ON 2 /* IP101A/G APS Mode bit */ #define IP101A_G_IRQ_CONF_STATUS 0x11 /* Conf Info IRQ & Status Reg */ +#define IP101A_G_IRQ_PIN_USED (1<<15) /* INTR pin used */ +#define IP101A_G_IRQ_DEFAULT IP101A_G_IRQ_PIN_USED static int ip175c_config_init(struct phy_device *phydev) { @@ -136,6 +138,11 @@ static int ip1001_config_init(struct phy_device *phydev) if (c < 0) return c; + /* INTR pin used: speed/link/duplex will cause an interrupt */ + c = phy_write(phydev, IP101A_G_IRQ_CONF_STATUS, IP101A_G_IRQ_DEFAULT); + if (c < 0) + return c; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII) { /* Additional delay (2ns) used to adjust RX clock phase * at RGMII interface */ From dc5cd894cace7bda4a743487a9f87d59a3f0a095 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 4 Jun 2012 06:42:38 +0000 Subject: [PATCH 137/475] net/hyperv: Use wait_event on outstanding sends during device removal Change the busy-waiting/udelay to wait_event on outstanding sends. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/netvsc.c | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 4ffcd57b011..2857ab078aa 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -478,6 +478,7 @@ struct netvsc_device { u32 nvsp_version; atomic_t num_outstanding_sends; + wait_queue_head_t wait_drain; bool start_remove; bool destroy; /* diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 8b919471472..0c569831db5 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -42,6 +42,7 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device) if (!net_device) return NULL; + init_waitqueue_head(&net_device->wait_drain); net_device->start_remove = false; net_device->destroy = false; net_device->dev = device; @@ -387,12 +388,8 @@ int netvsc_device_remove(struct hv_device *device) spin_unlock_irqrestore(&device->channel->inbound_lock, flags); /* Wait for all send completions */ - while (atomic_read(&net_device->num_outstanding_sends)) { - dev_info(&device->device, - "waiting for %d requests to complete...\n", - atomic_read(&net_device->num_outstanding_sends)); - udelay(100); - } + wait_event(net_device->wait_drain, + atomic_read(&net_device->num_outstanding_sends) == 0); netvsc_disconnect_vsp(net_device); @@ -486,6 +483,9 @@ static void netvsc_send_completion(struct hv_device *device, num_outstanding_sends = atomic_dec_return(&net_device->num_outstanding_sends); + if (net_device->destroy && num_outstanding_sends == 0) + wake_up(&net_device->wait_drain); + if (netif_queue_stopped(ndev) && !net_device->start_remove && (hv_ringbuf_avail_percent(&device->channel->outbound) > RING_AVAIL_PERCENT_HIWATER || From 0640113be25d283e0ff77a9f041e1242182387f0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 4 Jun 2012 11:00:45 -0700 Subject: [PATCH 138/475] vfs: Fix /proc//fdinfo/ file handling Cyrill Gorcunov reports that I broke the fdinfo files with commit 30a08bf2d31d ("proc: move fd symlink i_mode calculations into tid_fd_revalidate()"), and he's quite right. The tid_fd_revalidate() function is not just used for the /fd symlinks, it's also used for the /fdinfo/ files, and the permission model for those are different. So do the dynamic symlink permission handling just for symlinks, making the fdinfo files once more appear as the proper regular files they are. Of course, Al Viro argued (probably correctly) that we shouldn't do the symlink permission games at all, and make the symlinks always just be the normal 'lrwxrwxrwx'. That would have avoided this issue too, but since somebody noticed that the permissions had changed (which was the reason for that original commit 30a08bf2d31d in the first place), people do apparently use this feature. [ Basically, you can use the symlink permission data as a cheap "fdinfo" replacement, since you see whether the file is open for reading and/or writing by just looking at st_mode of the symlink. So the feature does make sense, even if the pain it has caused means we probably shouldn't have done it to begin with. ] Reported-and-tested-by: Cyrill Gorcunov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 616f41a7cde..437195f204e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1803,7 +1803,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - unsigned i_mode, f_mode = file->f_mode; + unsigned f_mode = file->f_mode; rcu_read_unlock(); put_files_struct(files); @@ -1819,12 +1819,14 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = GLOBAL_ROOT_GID; } - i_mode = S_IFLNK; - if (f_mode & FMODE_READ) - i_mode |= S_IRUSR | S_IXUSR; - if (f_mode & FMODE_WRITE) - i_mode |= S_IWUSR | S_IXUSR; - inode->i_mode = i_mode; + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + } security_task_to_inode(task, inode); put_task_struct(task); @@ -1859,6 +1861,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, ei = PROC_I(inode); ei->fd = fd; + inode->i_mode = S_IFLNK; inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; From 3eef8918ff440837f6af791942d8dd07e1a268ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Jun 2012 17:05:40 +0100 Subject: [PATCH 139/475] drm/i915: Mark the ringbuffers as being in the GTT domain By correctly describing the rinbuffers as being in the GTT domain, it appears that we are more careful with the management of the CPU cache upon resume and so prevent some coherency issue when submitting commands to the GPU later. A secondary effect is that the debug logs are then consistent with the actual usage (i.e. they no longer describe the ringbuffers as being in the CPU write domain when we are accessing them through an wc iomapping.) Reported-and-tested-by: Daniel Gnoutcheff Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=41092 Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 490df551e31..9fbad086cb4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -988,6 +988,10 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (ret) goto err_unref; + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + goto err_unpin; + ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset, ring->size); if (ring->virtual_start == NULL) { From b7884eb45ec98c0d34c7f49005ae9d4b4b4e38f6 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 4 Jun 2012 11:18:15 +0200 Subject: [PATCH 140/475] drm/i915: hold forcewake around ring hw init Empirical evidence suggests that we need to: On at least one ivb machine when running the hangman i-g-t test, the rings don't properly initialize properly - the RING_START registers seems to be stuck at all zeros. Holding forcewake around this register init sequences makes chip reset reliable again. Note that this is not the first such issue: commit f01db988ef6f6c70a6cc36ee71e4a98a68901229 Author: Sean Paul Date: Fri Mar 16 12:43:22 2012 -0400 drm/i915: Add wait_for in init_ring_common added delay loops to make RING_START and RING_CTL initialization reliable on the blt ring at boot-up. So I guess it won't hurt if we do this unconditionally for all force_wake needing gpus. To avoid copy&pasting of the HAS_FORCE_WAKE check I've added a new intel_info bit for that. v2: Fixup missing commas in static struct and properly handling the error case in init_ring_common, both noticed by Jani Nikula. Cc: stable@vger.kernel.org Reported-and-tested-by: Yang Guang Reviewed-by: Eugeni Dodonov Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50522 Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 13 +++++++++---- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 238a5216583..9fe9ebe52a7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -233,6 +233,7 @@ static const struct intel_device_info intel_sandybridge_d_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_sandybridge_m_info = { @@ -243,6 +244,7 @@ static const struct intel_device_info intel_sandybridge_m_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_ivybridge_d_info = { @@ -252,6 +254,7 @@ static const struct intel_device_info intel_ivybridge_d_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_ivybridge_m_info = { @@ -262,6 +265,7 @@ static const struct intel_device_info intel_ivybridge_m_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_valleyview_m_info = { @@ -289,6 +293,7 @@ static const struct intel_device_info intel_haswell_d_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_haswell_m_info = { @@ -298,6 +303,7 @@ static const struct intel_device_info intel_haswell_m_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct pci_device_id pciidlist[] = { /* aka */ @@ -1139,10 +1145,9 @@ MODULE_LICENSE("GPL and additional rights"); /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(dev_priv, reg) \ - (((dev_priv)->info->gen >= 6) && \ - ((reg) < 0x40000) && \ - ((reg) != FORCEWAKE)) && \ - (!IS_VALLEYVIEW((dev_priv)->dev)) + ((HAS_FORCE_WAKE((dev_priv)->dev)) && \ + ((reg) < 0x40000) && \ + ((reg) != FORCEWAKE)) #define __i915_read(x, y) \ u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 377c21f531e..cff63075765 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -285,6 +285,7 @@ struct intel_device_info { u8 is_ivybridge:1; u8 is_valleyview:1; u8 has_pch_split:1; + u8 has_force_wake:1; u8 is_haswell:1; u8 has_fbc:1; u8 has_pipe_cxsr:1; @@ -1098,6 +1099,8 @@ struct drm_i915_file_private { #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT) #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX) +#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake) + #include "i915_trace.h" /** diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9fbad086cb4..e5b84ff89ca 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -266,10 +266,15 @@ u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) static int init_ring_common(struct intel_ring_buffer *ring) { - drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj = ring->obj; + int ret = 0; u32 head; + if (HAS_FORCE_WAKE(dev)) + gen6_gt_force_wake_get(dev_priv); + /* Stop the ring if it's running. */ I915_WRITE_CTL(ring, 0); I915_WRITE_HEAD(ring, 0); @@ -317,7 +322,8 @@ static int init_ring_common(struct intel_ring_buffer *ring) I915_READ_HEAD(ring), I915_READ_TAIL(ring), I915_READ_START(ring)); - return -EIO; + ret = -EIO; + goto out; } if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) @@ -329,7 +335,11 @@ static int init_ring_common(struct intel_ring_buffer *ring) ring->last_retired_head = -1; } - return 0; +out: + if (HAS_FORCE_WAKE(dev)) + gen6_gt_force_wake_put(dev_priv); + + return ret; } static int From 925e64c3c512e9f4452eaa7d52fd4c1518b8fb11 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 16 May 2012 15:27:20 +0200 Subject: [PATCH 141/475] mac80211: run scan after finish connection monitoring commit 133d40f9a22bdfd2617a446f1e3209537c5415ec Author: Stanislaw Gruszka Date: Wed Mar 28 16:01:19 2012 +0200 mac80211: do not scan and monitor connection in parallel add bug, which make possible to start a scan and never finish it, so make every new scanning request finish with -EBUSY error. This can happen on code paths where we finish connection monitoring and clear IEEE80211_STA_*_POLL flags, but do not check if scan was deferred. This patch fixes those code paths. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 04c30630898..d94627c2929 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1220,6 +1220,22 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, sdata->vif.bss_conf.qos = true; } +static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) +{ + lockdep_assert_held(&sdata->local->mtx); + + sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | + IEEE80211_STA_BEACON_POLL); + ieee80211_run_deferred_scan(sdata->local); +} + +static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) +{ + mutex_lock(&sdata->local->mtx); + __ieee80211_stop_poll(sdata); + mutex_unlock(&sdata->local->mtx); +} + static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, u16 capab, bool erp_valid, u8 erp) { @@ -1285,8 +1301,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE; /* just to be sure */ - sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | - IEEE80211_STA_BEACON_POLL); + ieee80211_stop_poll(sdata); ieee80211_led_assoc(local, 1); @@ -1456,8 +1471,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) return; } - ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | - IEEE80211_STA_BEACON_POLL); + __ieee80211_stop_poll(sdata); mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); @@ -1477,7 +1491,6 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); out: - ieee80211_run_deferred_scan(local); mutex_unlock(&local->mtx); } @@ -2408,7 +2421,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, net_dbg_ratelimited("%s: cancelling probereq poll due to a received beacon\n", sdata->name); #endif + mutex_lock(&local->mtx); ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL; + ieee80211_run_deferred_scan(local); + mutex_unlock(&local->mtx); + mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); mutex_unlock(&local->iflist_mtx); @@ -2595,8 +2612,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[DEAUTH_DISASSOC_LEN]; - ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | - IEEE80211_STA_BEACON_POLL); + ieee80211_stop_poll(sdata); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, false, frame_buf); @@ -2874,8 +2890,7 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) u32 flags; if (sdata->vif.type == NL80211_IFTYPE_STATION) { - sdata->u.mgd.flags &= ~(IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL); + __ieee80211_stop_poll(sdata); /* let's probe the connection once */ flags = sdata->local->hw.flags; @@ -2944,7 +2959,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running)) add_timer(&ifmgd->chswitch_timer); ieee80211_sta_reset_beacon_monitor(sdata); + + mutex_lock(&sdata->local->mtx); ieee80211_restart_sta_timer(sdata); + mutex_unlock(&sdata->local->mtx); } #endif From f0e3bd23418e1ec1aeac5fc36b74bf2f645b1244 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Tue, 29 May 2012 15:39:04 -0700 Subject: [PATCH 142/475] mwifiex: invalidate bss config before setting channel for uAP Mark bss_config parameters as invalid before setting AP channel. This prevents from setting invalid parameters while setting AP channel to FW. Signed-off-by: Avinash Patil Signed-off-by: Kiran Divekar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/uap_cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mwifiex/uap_cmd.c b/drivers/net/wireless/mwifiex/uap_cmd.c index 76dfbc42a73..e9482c97e77 100644 --- a/drivers/net/wireless/mwifiex/uap_cmd.c +++ b/drivers/net/wireless/mwifiex/uap_cmd.c @@ -416,6 +416,7 @@ int mwifiex_uap_set_channel(struct mwifiex_private *priv, int channel) if (!bss_cfg) return -ENOMEM; + mwifiex_set_sys_config_invalid_data(bss_cfg); bss_cfg->band_cfg = BAND_CONFIG_MANUAL; bss_cfg->channel = channel; From 7a1c99343c5305c47b5e078d1af2144f9026df40 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Tue, 29 May 2012 15:39:05 -0700 Subject: [PATCH 143/475] mwifiex: support NL80211_HIDDEN_SSID_ZERO_LEN for uAP mwifiex uAP supports NL80211_HIDDEN_SSID_ZERO_LEN type of hidden SSID only. NL80211_HIDDEN_SSID_ZERO_CONTENTS is not supported. Signed-off-by: Avinash Patil Signed-off-by: Kiran Divekar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/cfg80211.c | 13 +++++++++++++ drivers/net/wireless/mwifiex/fw.h | 6 ++++++ drivers/net/wireless/mwifiex/uap_cmd.c | 9 +++++++++ 3 files changed, 28 insertions(+) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 87671446e24..015fec3371a 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -948,6 +948,19 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy, bss_cfg->ssid.ssid_len = params->ssid_len; } + switch (params->hidden_ssid) { + case NL80211_HIDDEN_SSID_NOT_IN_USE: + bss_cfg->bcast_ssid_ctl = 1; + break; + case NL80211_HIDDEN_SSID_ZERO_LEN: + bss_cfg->bcast_ssid_ctl = 0; + break; + case NL80211_HIDDEN_SSID_ZERO_CONTENTS: + /* firmware doesn't support this type of hidden SSID */ + default: + return -EINVAL; + } + if (mwifiex_set_secure_params(priv, bss_cfg, params)) { kfree(bss_cfg); wiphy_err(wiphy, "Failed to parse secuirty parameters!\n"); diff --git a/drivers/net/wireless/mwifiex/fw.h b/drivers/net/wireless/mwifiex/fw.h index 9f674bbebe6..561452a5c81 100644 --- a/drivers/net/wireless/mwifiex/fw.h +++ b/drivers/net/wireless/mwifiex/fw.h @@ -122,6 +122,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_TYPE_CHANNELBANDLIST (PROPRIETARY_TLV_BASE_ID + 42) #define TLV_TYPE_UAP_BEACON_PERIOD (PROPRIETARY_TLV_BASE_ID + 44) #define TLV_TYPE_UAP_DTIM_PERIOD (PROPRIETARY_TLV_BASE_ID + 45) +#define TLV_TYPE_UAP_BCAST_SSID (PROPRIETARY_TLV_BASE_ID + 48) #define TLV_TYPE_UAP_RTS_THRESHOLD (PROPRIETARY_TLV_BASE_ID + 51) #define TLV_TYPE_UAP_WPA_PASSPHRASE (PROPRIETARY_TLV_BASE_ID + 60) #define TLV_TYPE_UAP_ENCRY_PROTOCOL (PROPRIETARY_TLV_BASE_ID + 64) @@ -1209,6 +1210,11 @@ struct host_cmd_tlv_ssid { u8 ssid[0]; } __packed; +struct host_cmd_tlv_bcast_ssid { + struct host_cmd_tlv tlv; + u8 bcast_ctl; +} __packed; + struct host_cmd_tlv_beacon_period { struct host_cmd_tlv tlv; __le16 period; diff --git a/drivers/net/wireless/mwifiex/uap_cmd.c b/drivers/net/wireless/mwifiex/uap_cmd.c index e9482c97e77..8173ab66066 100644 --- a/drivers/net/wireless/mwifiex/uap_cmd.c +++ b/drivers/net/wireless/mwifiex/uap_cmd.c @@ -132,6 +132,7 @@ mwifiex_uap_bss_param_prepare(u8 *tlv, void *cmd_buf, u16 *param_size) struct host_cmd_tlv_dtim_period *dtim_period; struct host_cmd_tlv_beacon_period *beacon_period; struct host_cmd_tlv_ssid *ssid; + struct host_cmd_tlv_bcast_ssid *bcast_ssid; struct host_cmd_tlv_channel_band *chan_band; struct host_cmd_tlv_frag_threshold *frag_threshold; struct host_cmd_tlv_rts_threshold *rts_threshold; @@ -153,6 +154,14 @@ mwifiex_uap_bss_param_prepare(u8 *tlv, void *cmd_buf, u16 *param_size) cmd_size += sizeof(struct host_cmd_tlv) + bss_cfg->ssid.ssid_len; tlv += sizeof(struct host_cmd_tlv) + bss_cfg->ssid.ssid_len; + + bcast_ssid = (struct host_cmd_tlv_bcast_ssid *)tlv; + bcast_ssid->tlv.type = cpu_to_le16(TLV_TYPE_UAP_BCAST_SSID); + bcast_ssid->tlv.len = + cpu_to_le16(sizeof(bcast_ssid->bcast_ctl)); + bcast_ssid->bcast_ctl = bss_cfg->bcast_ssid_ctl; + cmd_size += sizeof(struct host_cmd_tlv_bcast_ssid); + tlv += sizeof(struct host_cmd_tlv_bcast_ssid); } if (bss_cfg->channel && bss_cfg->channel <= MAX_CHANNEL_BAND_BG) { chan_band = (struct host_cmd_tlv_channel_band *)tlv; From 28f333666ea766fdfb25de3783ff56cd2d1c51f0 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 29 May 2012 15:39:06 -0700 Subject: [PATCH 144/475] cfg80211: use sme_state in ibss start/join path CFG80211_DEV_WARN_ON() at "net/wireless/ibss.c line 63" is unnecessarily triggered even after successful connection, when cfg80211_ibss_joined() is called by driver inside .join_ibss handler. This patch fixes the problem by changing 'sme_state' in ibss path and having WARN_ON() check for 'sme_state' similar to infra association. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- net/wireless/ibss.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d2a19b0ff71..89baa332841 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -42,6 +42,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); + wdev->sme_state = CFG80211_SME_CONNECTED; cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, @@ -60,7 +61,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp) struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(!wdev->ssid_len); + CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING); ev = kzalloc(sizeof(*ev), gfp); if (!ev) @@ -115,9 +116,11 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.channel = params->channel; #endif + wdev->sme_state = CFG80211_SME_CONNECTING; err = rdev->ops->join_ibss(&rdev->wiphy, dev, params); if (err) { wdev->connect_keys = NULL; + wdev->sme_state = CFG80211_SME_IDLE; return err; } @@ -169,6 +172,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) } wdev->current_bss = NULL; + wdev->sme_state = CFG80211_SME_IDLE; wdev->ssid_len = 0; #ifdef CONFIG_CFG80211_WEXT if (!nowext) From b8bacc187aa5b59af9b9fa19b3ce4df5ad1db112 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Wed, 30 May 2012 09:30:41 +0800 Subject: [PATCH 145/475] mac80211: Fix Unreachable Mesh Station Problem when joining to another MBSS Mesh station that joins an MBSS is reachable using mesh portal with 6 address frame by mesh stations from another MBSS if these two different MBSSes are bridged. However, if the mesh station later moves into the same MBSS of those mesh stations, it is unreachable by mesh stations in the MBSS due to the mpp_paths table is not deleted. A quick fix is to perform mesh_path_lookup, if it is available for the target destination, mpp_path_lookup is not performed. When the mesh station moves back to its original MBSS, the mesh_paths will be deleted once expired. So, it will be reachable using mpp_path_lookup again. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: John W. Linville --- net/mac80211/tx.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 847215bb2a6..e453212fa17 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1737,7 +1737,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, __le16 fc; struct ieee80211_hdr hdr; struct ieee80211s_hdr mesh_hdr __maybe_unused; - struct mesh_path __maybe_unused *mppath = NULL; + struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; int nh_pos, h_pos; @@ -1803,8 +1803,11 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } rcu_read_lock(); - if (!is_multicast_ether_addr(skb->data)) - mppath = mpp_path_lookup(skb->data, sdata); + if (!is_multicast_ether_addr(skb->data)) { + mpath = mesh_path_lookup(skb->data, sdata); + if (!mpath) + mppath = mpp_path_lookup(skb->data, sdata); + } /* * Use address extension if it is a packet from From a5fdde28b4f5fb756032e7ad2c6fcdcffde20958 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 30 May 2012 10:36:12 +0200 Subject: [PATCH 146/475] iwlwifi: fix TX power antenna access Since my commit iwlwifi: use valid TX/RX antenna from hw_params the config values are pure overrides, not the real values for all hardware. Therefore, the EEPROM TX power reading code checks the wrong values, it should check the hw_params values. Cc: stable@kernel.org [3.4] Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-eeprom.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.c b/drivers/net/wireless/iwlwifi/iwl-eeprom.c index 50c58911e71..b8e2b223ac3 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom.c +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.c @@ -568,28 +568,28 @@ static int iwl_find_otp_image(struct iwl_trans *trans, * iwl_get_max_txpower_avg - get the highest tx power from all chains. * find the highest tx power from all chains for the channel */ -static s8 iwl_get_max_txpower_avg(const struct iwl_cfg *cfg, +static s8 iwl_get_max_txpower_avg(struct iwl_priv *priv, struct iwl_eeprom_enhanced_txpwr *enhanced_txpower, int element, s8 *max_txpower_in_half_dbm) { s8 max_txpower_avg = 0; /* (dBm) */ /* Take the highest tx power from any valid chains */ - if ((cfg->valid_tx_ant & ANT_A) && + if ((priv->hw_params.valid_tx_ant & ANT_A) && (enhanced_txpower[element].chain_a_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].chain_a_max; - if ((cfg->valid_tx_ant & ANT_B) && + if ((priv->hw_params.valid_tx_ant & ANT_B) && (enhanced_txpower[element].chain_b_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].chain_b_max; - if ((cfg->valid_tx_ant & ANT_C) && + if ((priv->hw_params.valid_tx_ant & ANT_C) && (enhanced_txpower[element].chain_c_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].chain_c_max; - if (((cfg->valid_tx_ant == ANT_AB) | - (cfg->valid_tx_ant == ANT_BC) | - (cfg->valid_tx_ant == ANT_AC)) && + if (((priv->hw_params.valid_tx_ant == ANT_AB) | + (priv->hw_params.valid_tx_ant == ANT_BC) | + (priv->hw_params.valid_tx_ant == ANT_AC)) && (enhanced_txpower[element].mimo2_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].mimo2_max; - if ((cfg->valid_tx_ant == ANT_ABC) && + if ((priv->hw_params.valid_tx_ant == ANT_ABC) && (enhanced_txpower[element].mimo3_max > max_txpower_avg)) max_txpower_avg = enhanced_txpower[element].mimo3_max; @@ -691,7 +691,7 @@ static void iwl_eeprom_enhanced_txpower(struct iwl_priv *priv) ((txp->delta_20_in_40 & 0xf0) >> 4), (txp->delta_20_in_40 & 0x0f)); - max_txp_avg = iwl_get_max_txpower_avg(priv->cfg, txp_array, idx, + max_txp_avg = iwl_get_max_txpower_avg(priv, txp_array, idx, &max_txp_avg_halfdbm); /* From bd34ab62a3297bd7685da11b0cbe05ae4cd8b02c Mon Sep 17 00:00:00 2001 From: Meenakshi Venkataraman Date: Wed, 30 May 2012 11:39:33 +0200 Subject: [PATCH 147/475] mac80211: fix error in station state transitions during reconfig As part of hardware reconfig mac80211 tries to restore the station state to its values before the hardware reconfig, but it only goes to the last-state - 1. Fix this off-by-one error. Cc: stable@kernel.org [3.4] Signed-off-by: Meenakshi Venkataraman Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a44c6807df0..8dd4712620f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1271,7 +1271,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) enum ieee80211_sta_state state; for (state = IEEE80211_STA_NOTEXIST; - state < sta->sta_state - 1; state++) + state < sta->sta_state; state++) WARN_ON(drv_sta_state(local, sta->sdata, sta, state, state + 1)); } From 1ae2fc25a1289a84ca726541e3356ba5bcb7f7fe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 30 May 2012 14:59:36 +0200 Subject: [PATCH 148/475] mac80211_hwsim: advertise interface combinations Enforcing interface combinations broke uses of hwsim with multiple virtual interfaces. Advertise that all combinations are possible to fix this. Reported-by: Nirav Shah Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index fb787df0166..4c9336cee81 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1721,6 +1721,24 @@ static void hwsim_exit_netlink(void) "unregister family %i\n", ret); } +static const struct ieee80211_iface_limit hwsim_if_limits[] = { + { .max = 1, .types = BIT(NL80211_IFTYPE_ADHOC) }, + { .max = 2048, .types = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_P2P_CLIENT) | +#ifdef CONFIG_MAC80211_MESH + BIT(NL80211_IFTYPE_MESH_POINT) | +#endif + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_P2P_GO) }, +}; + +static const struct ieee80211_iface_combination hwsim_if_comb = { + .limits = hwsim_if_limits, + .n_limits = ARRAY_SIZE(hwsim_if_limits), + .max_interfaces = 2048, + .num_different_channels = 1, +}; + static int __init init_mac80211_hwsim(void) { int i, err = 0; @@ -1782,6 +1800,9 @@ static int __init init_mac80211_hwsim(void) hw->wiphy->n_addresses = 2; hw->wiphy->addresses = data->addresses; + hw->wiphy->iface_combinations = &hwsim_if_comb; + hw->wiphy->n_iface_combinations = 1; + if (fake_hw_scan) { hw->wiphy->max_scan_ssids = 255; hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN; From 71ecfa1893034eeb1c93e02e22ee2ad26d080858 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 31 May 2012 15:09:27 +0200 Subject: [PATCH 149/475] mac80211: clean up remain-on-channel on interface stop When any interface goes down, it could be the one that we were doing a remain-on-channel with. We therefore need to cancel the remain-on-channel and flush the related work structs so they don't run after the interface has been removed or even destroyed. It's also possible in this case that an off-channel SKB was never transmitted, so free it if this is the case. Note that this can also happen if the driver finishes the off-channel period without ever starting it. Cc: stable@kernel.org Reported-by: Nirav Shah Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/iface.c | 12 ++++++++++++ net/mac80211/offchannel.c | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d4c19a7773d..8664111d056 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -637,6 +637,18 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_configure_filter(local); break; default: + mutex_lock(&local->mtx); + if (local->hw_roc_dev == sdata->dev && + local->hw_roc_channel) { + /* ignore return value since this is racy */ + drv_cancel_remain_on_channel(local); + ieee80211_queue_work(&local->hw, &local->hw_roc_done); + } + mutex_unlock(&local->mtx); + + flush_work(&local->hw_roc_start); + flush_work(&local->hw_roc_done); + flush_work(&sdata->work); /* * When we get here, the interface is marked down. diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index f054e94901a..935aa4b6dee 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -234,6 +234,22 @@ static void ieee80211_hw_roc_done(struct work_struct *work) return; } + /* was never transmitted */ + if (local->hw_roc_skb) { + u64 cookie; + + cookie = local->hw_roc_cookie ^ 2; + + cfg80211_mgmt_tx_status(local->hw_roc_dev, cookie, + local->hw_roc_skb->data, + local->hw_roc_skb->len, false, + GFP_KERNEL); + + kfree_skb(local->hw_roc_skb); + local->hw_roc_skb = NULL; + local->hw_roc_skb_for_status = NULL; + } + if (!local->hw_roc_for_tx) cfg80211_remain_on_channel_expired(local->hw_roc_dev, local->hw_roc_cookie, From d8c7aae64cd2db5eccc631c29fa978a24fb1feef Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 30 May 2012 15:32:24 +0200 Subject: [PATCH 150/475] mac80211: add missing rcu_read_lock/unlock in agg-rx session timer Fixes a lockdep warning: =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- net/mac80211/agg-rx.c:148 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by arecord/11226: #0: (&tid_agg_rx->session_timer){+.-...}, at: [] call_timer_fn+0x0/0x360 stack backtrace: Pid: 11226, comm: arecord Not tainted 3.1.0-kml #16 Call Trace: [] lockdep_rcu_dereference+0xa4/0xc0 [] sta_rx_agg_session_timer_expired+0xc9/0x110 [mac80211] [] ? ieee80211_process_addba_resp+0x220/0x220 [mac80211] [] call_timer_fn+0x8a/0x360 [] ? init_timer_deferrable_key+0x30/0x30 [] ? _raw_spin_unlock_irq+0x30/0x70 [] run_timer_softirq+0x139/0x310 [] ? put_lock_stats.isra.25+0xe/0x40 [] ? lock_release_holdtime.part.26+0xdc/0x160 [] ? ieee80211_process_addba_resp+0x220/0x220 [mac80211] [] __do_softirq+0xc8/0x3c0 [] ? tick_dev_program_event+0x48/0x110 [] ? tick_program_event+0x1f/0x30 [] ? putname+0x35/0x50 [] call_softirq+0x1c/0x30 [] do_softirq+0xa5/0xe0 [] irq_exit+0xae/0xe0 [] smp_apic_timer_interrupt+0x6b/0x98 [] apic_timer_interrupt+0x73/0x80 [] ? free_debug_processing+0x1a1/0x1d5 [] ? putname+0x35/0x50 [] __slab_free+0x31/0x2ca [] ? _raw_spin_unlock_irqrestore+0x4a/0x90 [] ? __debug_check_no_obj_freed+0x15f/0x210 [] ? lock_release_nested+0x84/0xc0 [] ? kmem_cache_free+0x105/0x250 [] ? putname+0x35/0x50 [] ? putname+0x35/0x50 [] kmem_cache_free+0x23f/0x250 [] putname+0x35/0x50 [] do_sys_open+0x16d/0x1d0 [] sys_open+0x20/0x30 [] system_call_fastpath+0x16/0x1b Reported-by: Johannes Berg Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 26ddb699d69..c649188314c 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -145,15 +145,20 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) struct tid_ampdu_rx *tid_rx; unsigned long timeout; + rcu_read_lock(); tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[*ptid]); - if (!tid_rx) + if (!tid_rx) { + rcu_read_unlock(); return; + } timeout = tid_rx->last_rx + TU_TO_JIFFIES(tid_rx->timeout); if (time_is_after_jiffies(timeout)) { mod_timer(&tid_rx->session_timer, timeout); + rcu_read_unlock(); return; } + rcu_read_unlock(); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); From 5204267d2fd5e98fc52b44fec01ad10352642b78 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 30 May 2012 13:25:54 -0700 Subject: [PATCH 151/475] mac80211: Fix likely misuse of | for & Using | with a constant is always true. Likely this should have be &. cc: Ben Greear Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 495831ee48f..e9cecca5c44 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -533,16 +533,16 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, sinfo.filled = 0; sta_set_sinfo(sta, &sinfo); - if (sinfo.filled | STATION_INFO_TX_BITRATE) + if (sinfo.filled & STATION_INFO_TX_BITRATE) data[i] = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); i++; - if (sinfo.filled | STATION_INFO_RX_BITRATE) + if (sinfo.filled & STATION_INFO_RX_BITRATE) data[i] = 100000 * cfg80211_calculate_bitrate(&sinfo.rxrate); i++; - if (sinfo.filled | STATION_INFO_SIGNAL_AVG) + if (sinfo.filled & STATION_INFO_SIGNAL_AVG) data[i] = (u8)sinfo.signal_avg; i++; } else { From f304a993190965f48baa49792d5ab2849ae26bf9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 30 May 2012 13:25:56 -0700 Subject: [PATCH 152/475] brcmfmac: Fix likely misuse of | for & Using | with a constant is always true. Likely this should have be &. Signed-off-by: Joe Perches Acked-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index e2480d19627..8e7e6928c93 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -89,9 +89,9 @@ int brcmf_sdio_intr_register(struct brcmf_sdio_dev *sdiodev) data |= 1 << SDIO_FUNC_1 | 1 << SDIO_FUNC_2 | 1; brcmf_sdio_regwb(sdiodev, SDIO_CCCR_IENx, data, &ret); - /* redirect, configure ane enable io for interrupt signal */ + /* redirect, configure and enable io for interrupt signal */ data = SDIO_SEPINT_MASK | SDIO_SEPINT_OE; - if (sdiodev->irq_flags | IRQF_TRIGGER_HIGH) + if (sdiodev->irq_flags & IRQF_TRIGGER_HIGH) data |= SDIO_SEPINT_ACT_HI; brcmf_sdio_regwb(sdiodev, SDIO_CCCR_BRCM_SEPINT, data, &ret); From e5851dac2c95af7159716832300b9f50c62c648e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 1 Jun 2012 11:29:40 +0200 Subject: [PATCH 153/475] rt2x00: use atomic variable for seqno Remove spinlock as atomic_t can be used instead. Note we use only 16 lower bits, upper bits are changed but we impilcilty cast to u16. This fix possible deadlock on IBSS mode reproted by lockdep: ================================= [ INFO: inconsistent lock state ] 3.4.0-wl+ #4 Not tainted --------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. kworker/u:2/30374 [HC0[0]:SC0[0]:HE1:SE1] takes: (&(&intf->seqlock)->rlock){+.?...}, at: [] rt2x00queue_create_tx_descriptor+0x380/0x490 [rt2x00lib] {IN-SOFTIRQ-W} state was registered at: [] __lock_acquire+0x47b/0x1050 [] lock_acquire+0x84/0xf0 [] _raw_spin_lock+0x33/0x40 [] rt2x00queue_create_tx_descriptor+0x380/0x490 [rt2x00lib] [] rt2x00queue_write_tx_frame+0x1a/0x300 [rt2x00lib] [] rt2x00mac_tx+0x7f/0x380 [rt2x00lib] [] __ieee80211_tx+0x1b3/0x300 [mac80211] [] ieee80211_tx+0x105/0x130 [mac80211] [] ieee80211_xmit+0xad/0x100 [mac80211] [] ieee80211_subif_start_xmit+0x2d9/0x930 [mac80211] [] dev_hard_start_xmit+0x307/0x660 [] sch_direct_xmit+0xa1/0x1e0 [] dev_queue_xmit+0x183/0x730 [] neigh_resolve_output+0xfa/0x1e0 [] ip_finish_output+0x24a/0x460 [] ip_output+0xb7/0x100 [] ip_local_out+0x20/0x60 [] igmpv3_sendpack+0x4f/0x60 [] igmp_ifc_timer_expire+0x29f/0x330 [] run_timer_softirq+0x15c/0x2f0 [] __do_softirq+0xae/0x1e0 irq event stamp: 18380437 hardirqs last enabled at (18380437): [] __slab_alloc.clone.3+0x67/0x5f0 hardirqs last disabled at (18380436): [] __slab_alloc.clone.3+0x33/0x5f0 softirqs last enabled at (18377616): [] __do_softirq+0x123/0x1e0 softirqs last disabled at (18377611): [] do_softirq+0x9d/0xe0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&intf->seqlock)->rlock); lock(&(&intf->seqlock)->rlock); *** DEADLOCK *** 4 locks held by kworker/u:2/30374: #0: (wiphy_name(local->hw.wiphy)){++++.+}, at: [] process_one_work+0x109/0x3f0 #1: ((&sdata->work)){+.+.+.}, at: [] process_one_work+0x109/0x3f0 #2: (&ifibss->mtx){+.+.+.}, at: [] ieee80211_ibss_work+0x1b/0x470 [mac80211] #3: (&intf->beacon_skb_mutex){+.+...}, at: [] rt2x00queue_update_beacon+0x24/0x50 [rt2x00lib] stack backtrace: Pid: 30374, comm: kworker/u:2 Not tainted 3.4.0-wl+ #4 Call Trace: [] print_usage_bug+0x1f6/0x220 [] mark_lock+0x2c2/0x300 [] ? check_usage_forwards+0xc0/0xc0 [] __lock_acquire+0x4bc/0x1050 [] ? __kmalloc_track_caller+0x1c0/0x1d0 [] ? copy_skb_header+0x26/0x90 [] lock_acquire+0x84/0xf0 [] ? rt2x00queue_create_tx_descriptor+0x380/0x490 [rt2x00lib] [] _raw_spin_lock+0x33/0x40 [] ? rt2x00queue_create_tx_descriptor+0x380/0x490 [rt2x00lib] [] rt2x00queue_create_tx_descriptor+0x380/0x490 [rt2x00lib] [] rt2x00queue_update_beacon_locked+0x5f/0xb0 [rt2x00lib] [] rt2x00queue_update_beacon+0x2d/0x50 [rt2x00lib] [] rt2x00mac_bss_info_changed+0x1ca/0x200 [rt2x00lib] [] ? rt2x00mac_remove_interface+0x70/0x70 [rt2x00lib] [] ieee80211_bss_info_change_notify+0xe0/0x1d0 [mac80211] [] __ieee80211_sta_join_ibss+0x3b8/0x610 [mac80211] [] ? mark_held_locks+0x64/0xc0 [] ? virt_efi_query_capsule_caps+0x12/0x50 [] ieee80211_sta_join_ibss+0xf9/0x140 [mac80211] [] ieee80211_ibss_work+0x416/0x470 [mac80211] [] ? trace_hardirqs_on+0xb/0x10 [] ? skb_dequeue+0x4b/0x70 [] ieee80211_iface_work+0x13f/0x230 [mac80211] [] ? process_one_work+0x109/0x3f0 [] process_one_work+0x185/0x3f0 [] ? process_one_work+0x109/0x3f0 [] ? ieee80211_teardown_sdata+0xa0/0xa0 [mac80211] [] worker_thread+0x116/0x270 [] ? manage_workers+0x1e0/0x1e0 [] kthread+0x84/0x90 [] ? __init_kthread_worker+0x60/0x60 [] kernel_thread_helper+0x6/0x10 Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Helmut Schaa Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00.h | 3 +-- drivers/net/wireless/rt2x00/rt2x00mac.c | 1 - drivers/net/wireless/rt2x00/rt2x00queue.c | 13 ++++++------- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index ca36cccaba3..8f754025b06 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -396,8 +396,7 @@ struct rt2x00_intf { * for hardware which doesn't support hardware * sequence counting. */ - spinlock_t seqlock; - u16 seqno; + atomic_t seqno; }; static inline struct rt2x00_intf* vif_to_intf(struct ieee80211_vif *vif) diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index b49773ef72f..dd24b2663b5 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -277,7 +277,6 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, else rt2x00dev->intf_sta_count++; - spin_lock_init(&intf->seqlock); mutex_init(&intf->beacon_skb_mutex); intf->beacon = entry; diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 4c662eccf53..2fd83010341 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -207,6 +207,7 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev, struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif); + u16 seqno; if (!(tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)) return; @@ -238,15 +239,13 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev, * sequence counting per-frame, since those will override the * sequence counter given by mac80211. */ - spin_lock(&intf->seqlock); - if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags)) - intf->seqno += 0x10; + seqno = atomic_add_return(0x10, &intf->seqno); + else + seqno = atomic_read(&intf->seqno); + hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG); - hdr->seq_ctrl |= cpu_to_le16(intf->seqno); - - spin_unlock(&intf->seqlock); - + hdr->seq_ctrl |= cpu_to_le16(seqno); } static void rt2x00queue_create_tx_descriptor_plcp(struct rt2x00_dev *rt2x00dev, From 69aaedd3cfd23b2c732e3cf1227370a35f5c89d4 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Fri, 1 Jun 2012 09:13:17 -0500 Subject: [PATCH 154/475] bcma: add ext PA workaround for BCM4331 and BCM43431 MacBook Pro models with BCM4331 wireless have been found to have the ext PA lines disabled after resuming from S3 without external power attach. This causes them to be unable to transmit. Add a workaround to ensure that the ext PA lines are enabled on BCM4331. Also extend all handling of ext PA line muxing to BCM43431 as is done in the Broadcom SDK. BugLink: http://bugs.launchpad.net/bugs/925577 Cc: Arend van Spriel Cc: Hauke Mehrtens Cc: stable@vger.kernel.org Signed-off-by: Seth Forshee Signed-off-by: John W. Linville --- drivers/bcma/driver_chipcommon_pmu.c | 4 +++- drivers/bcma/sprom.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c index a058842f14f..61ce4054b3c 100644 --- a/drivers/bcma/driver_chipcommon_pmu.c +++ b/drivers/bcma/driver_chipcommon_pmu.c @@ -139,7 +139,9 @@ void bcma_pmu_workarounds(struct bcma_drv_cc *cc) bcma_chipco_chipctl_maskset(cc, 0, ~0, 0x7); break; case 0x4331: - /* BCM4331 workaround is SPROM-related, we put it in sprom.c */ + case 43431: + /* Ext PA lines must be enabled for tx on BCM4331 */ + bcma_chipco_bcm4331_ext_pa_lines_ctl(cc, true); break; case 43224: if (bus->chipinfo.rev == 0) { diff --git a/drivers/bcma/sprom.c b/drivers/bcma/sprom.c index c7f93359acb..f16f42d3607 100644 --- a/drivers/bcma/sprom.c +++ b/drivers/bcma/sprom.c @@ -579,13 +579,13 @@ int bcma_sprom_get(struct bcma_bus *bus) if (!sprom) return -ENOMEM; - if (bus->chipinfo.id == 0x4331) + if (bus->chipinfo.id == 0x4331 || bus->chipinfo.id == 43431) bcma_chipco_bcm4331_ext_pa_lines_ctl(&bus->drv_cc, false); pr_debug("SPROM offset 0x%x\n", offset); bcma_sprom_read(bus, offset, sprom); - if (bus->chipinfo.id == 0x4331) + if (bus->chipinfo.id == 0x4331 || bus->chipinfo.id == 43431) bcma_chipco_bcm4331_ext_pa_lines_ctl(&bus->drv_cc, true); err = bcma_sprom_valid(sprom); From 794454ce72a298de6f4536ade597bdcc7dcde7c7 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 3 Jun 2012 23:32:32 +0300 Subject: [PATCH 155/475] mac80211: fix non RCU-safe sta_list manipulation sta_info_cleanup locks the sta_list using rcu_read_lock however the delete operation isn't rcu safe. A race between sta_info_cleanup timer being called and a STA being removed can occur which leads to a panic while traversing sta_list. Fix this by switching to the RCU-safe versions. Cc: stable@vger.kernel.org Reported-by: Eyal Shapira Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f5b1638fbf8..de455f8bbb9 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -378,7 +378,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) /* make the station visible */ sta_info_hash_add(local, sta); - list_add(&sta->list, &local->sta_list); + list_add_rcu(&sta->list, &local->sta_list); set_sta_flag(sta, WLAN_STA_INSERTED); @@ -688,7 +688,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) if (ret) return ret; - list_del(&sta->list); + list_del_rcu(&sta->list); mutex_lock(&local->key_mtx); for (i = 0; i < NUM_DEFAULT_KEYS; i++) From fcb6ff5e2cb83e1de10631f6621f45ca3401bf61 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 4 Jun 2012 13:43:11 +0200 Subject: [PATCH 156/475] iwlwifi: disable WoWLAN if !CONFIG_PM_SLEEP If CONFIG_PM_SLEEP is disabled, then iwlwifi doesn't support suspend/resume handlers and thus mac80211 (correctly) refuses advertising WoWLAN. Disable WoWLAN in the driver in this case. Cc: stable@kernel.org Reported-by: Sebastian Kemper Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-mac80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-mac80211.c b/drivers/net/wireless/iwlwifi/iwl-mac80211.c index ab2f4d7500a..5d158ca9d24 100644 --- a/drivers/net/wireless/iwlwifi/iwl-mac80211.c +++ b/drivers/net/wireless/iwlwifi/iwl-mac80211.c @@ -199,6 +199,7 @@ int iwlagn_mac_setup_register(struct iwl_priv *priv, WIPHY_FLAG_DISABLE_BEACON_HINTS | WIPHY_FLAG_IBSS_RSN; +#ifdef CONFIG_PM_SLEEP if (priv->fw->img[IWL_UCODE_WOWLAN].sec[0].len && priv->trans->ops->wowlan_suspend && device_can_wakeup(priv->trans->dev)) { @@ -217,6 +218,7 @@ int iwlagn_mac_setup_register(struct iwl_priv *priv, hw->wiphy->wowlan.pattern_max_len = IWLAGN_WOWLAN_MAX_PATTERN_LEN; } +#endif if (iwlwifi_mod_params.power_save) hw->wiphy->flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; From fad0c66c4bb836d57a5f125ecd38bed653ca863a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 30 May 2012 10:54:57 -0700 Subject: [PATCH 157/475] timekeeping: Fix CLOCK_MONOTONIC inconsistency during leapsecond Commit 6b43ae8a61 (ntp: Fix leap-second hrtimer livelock) broke the leapsecond update of CLOCK_MONOTONIC. The missing leapsecond update to wall_to_monotonic causes discontinuities in CLOCK_MONOTONIC. Adjust wall_to_monotonic when NTP inserted a leapsecond. Reported-by: Richard Cochran Signed-off-by: John Stultz Tested-by: Richard Cochran Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1338400497-12420-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6e46cacf596..6f46a00a1e8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -962,6 +962,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) timekeeper.xtime.tv_sec++; leap = second_overflow(timekeeper.xtime.tv_sec); timekeeper.xtime.tv_sec += leap; + timekeeper.wall_to_monotonic.tv_sec -= leap; } /* Accumulate raw time */ @@ -1077,6 +1078,7 @@ static void update_wall_time(void) timekeeper.xtime.tv_sec++; leap = second_overflow(timekeeper.xtime.tv_sec); timekeeper.xtime.tv_sec += leap; + timekeeper.wall_to_monotonic.tv_sec -= leap; } timekeeping_update(false); From ad1ed2937eaa30adf9996ef5f21e0c1895f906b0 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 4 Jun 2012 14:29:59 +1000 Subject: [PATCH 158/475] nommu: fix compilation of nommu.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling 3.5-rc1 for nommu targets gives: CC mm/nommu.o mm/nommu.c: In function ‘sys_mmap_pgoff’: mm/nommu.c:1489:2: error: ‘ret’ undeclared (first use in this function) mm/nommu.c:1489:2: note: each undeclared identifier is reported only once for each function it appears in It is trivially fixed by replacing 'ret' with the local variable that is already defined for the return value 'retval'. Signed-off-by: Greg Ungerer Signed-off-by: Al Viro --- mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index c4acfbc0997..d4b0c10872d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1486,7 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); + retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); if (file) fput(file); From 03240b279dbbda41f2fc55ff4424acf651e25bef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Jun 2012 17:47:34 -0400 Subject: [PATCH 159/475] fixups for signal breakage Obvious brainos spotted by Geert. Signed-off-by: Al Viro --- arch/avr32/kernel/signal.c | 2 +- arch/xtensa/include/asm/syscall.h | 4 ++-- arch/xtensa/kernel/signal.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index c140f9b41dc..d552a854dac 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -300,7 +300,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti) if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR) syscall = 1; - if (ti->flags & _TIF_SIGPENDING)) + if (ti->flags & _TIF_SIGPENDING) do_signal(regs, syscall); if (ti->flags & _TIF_NOTIFY_RESUME) { diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 0b9f2e13c78..c1dacca312f 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -31,5 +31,5 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize); - - +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, + size_t sigsetsize); diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index b9f8e5850d3..efe4e854b3c 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -493,7 +493,7 @@ static void do_signal(struct pt_regs *regs) if (ret) return; - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, &info, &ka, regs, 0); if (current->ptrace & PT_SINGLESTEP) task_pt_regs(current)->icountlevel = 1; From 363b06aaa59fc20d0a9c5a5a9ce1fa2c45946700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 14 May 2012 11:08:51 +0900 Subject: [PATCH 160/475] drm/exynos: Use DRM_FORMAT_{NV12, YUV420} instead of DRM_FORMAT_{NV12M, YUV420M} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NV12M/YUV420M formats are identical to the already existing standard NV12/YUV420 formats. The M variants will be removed, so convert the driver to use the standard names. Signed-off-by: Ville Syrjälä Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_fb.h | 4 ++-- drivers/gpu/drm/exynos/exynos_mixer.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h index 3ecb30d9355..50823756cde 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.h +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h @@ -31,10 +31,10 @@ static inline int exynos_drm_format_num_buffers(uint32_t format) { switch (format) { - case DRM_FORMAT_NV12M: + case DRM_FORMAT_NV12: case DRM_FORMAT_NV12MT: return 2; - case DRM_FORMAT_YUV420M: + case DRM_FORMAT_YUV420: return 3; default: return 1; diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 68ef0102837..5a46e583c5b 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -365,7 +365,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) switch (win_data->pixel_format) { case DRM_FORMAT_NV12MT: tiled_mode = true; - case DRM_FORMAT_NV12M: + case DRM_FORMAT_NV12: crcb_mode = false; buf_num = 2; break; From 13b87b27421e12f82ebbaac018cea30f82e5c33e Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Mon, 14 May 2012 20:04:38 +0900 Subject: [PATCH 161/475] drm/exynos: fixed size type. size type of drm_exynos_gem_mmap struct is changed to uint64_t and it adds pad for the struct to be aligned as 64bit. Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- include/drm/exynos_drm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h index b6d7ce92ead..68733587e70 100644 --- a/include/drm/exynos_drm.h +++ b/include/drm/exynos_drm.h @@ -64,6 +64,7 @@ struct drm_exynos_gem_map_off { * A structure for mapping buffer. * * @handle: a handle to gem object created. + * @pad: just padding to be 64-bit aligned. * @size: memory size to be mapped. * @mapped: having user virtual address mmaped. * - this variable would be filled by exynos gem module @@ -72,7 +73,8 @@ struct drm_exynos_gem_map_off { */ struct drm_exynos_gem_mmap { unsigned int handle; - unsigned int size; + unsigned int pad; + uint64_t size; uint64_t mapped; }; From 293a1c128ecc523e9a74252ca64220d8081be759 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 16 May 2012 17:08:53 +0200 Subject: [PATCH 162/475] drm/exynos: DRIVER_BUS_PLATFORM is not a driver feature DRIVER_BUS_PLATFORM is a bus type used internally in the DRM core, not a flag for the drm_driver::driver_features field. Signed-off-by: Laurent Pinchart Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 420953197d0..d6de2e07fa0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -244,8 +244,8 @@ static const struct file_operations exynos_drm_driver_fops = { }; static struct drm_driver exynos_drm_driver = { - .driver_features = DRIVER_HAVE_IRQ | DRIVER_BUS_PLATFORM | - DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME, + .driver_features = DRIVER_HAVE_IRQ | DRIVER_MODESET | + DRIVER_GEM | DRIVER_PRIME, .load = exynos_drm_load, .unload = exynos_drm_unload, .open = exynos_drm_open, From 6037bafa2e676162a86e4f4dee366e394565a0ee Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 16 May 2012 17:08:55 +0200 Subject: [PATCH 163/475] drm/exynos: Don't cast GEM object to Exynos GEM object when not needed The exynos_drm_gem_dumb_map_offset() doesn't need to access any Exynos-specific GEM object fields, don't cast the GEM object. Signed-off-by: Laurent Pinchart Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index fc91293c456..5c8b683029e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -689,7 +689,6 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv, struct drm_device *dev, uint32_t handle, uint64_t *offset) { - struct exynos_drm_gem_obj *exynos_gem_obj; struct drm_gem_object *obj; int ret = 0; @@ -710,15 +709,13 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv, goto unlock; } - exynos_gem_obj = to_exynos_gem_obj(obj); - - if (!exynos_gem_obj->base.map_list.map) { - ret = drm_gem_create_mmap_offset(&exynos_gem_obj->base); + if (!obj->map_list.map) { + ret = drm_gem_create_mmap_offset(obj); if (ret) goto out; } - *offset = (u64)exynos_gem_obj->base.map_list.hash.key << PAGE_SHIFT; + *offset = (u64)obj->map_list.hash.key << PAGE_SHIFT; DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset); out: From 07b6835f2c6bc3101ed7cf471f566d53319a6d50 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 16 May 2012 17:08:56 +0200 Subject: [PATCH 164/475] drm/exynos: Keep a reference to frame buffer GEM objects GEM objects used by frame buffers must be referenced for the whole life of the frame buffer. Release the references in the frame buffer destructor instead of its constructor. Signed-off-by: Laurent Pinchart Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index f82a299553f..4ccfe4328fa 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -51,11 +51,22 @@ struct exynos_drm_fb { static void exynos_drm_fb_destroy(struct drm_framebuffer *fb) { struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); + unsigned int i; DRM_DEBUG_KMS("%s\n", __FILE__); drm_framebuffer_cleanup(fb); + for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem_obj); i++) { + struct drm_gem_object *obj; + + if (exynos_fb->exynos_gem_obj[i] == NULL) + continue; + + obj = &exynos_fb->exynos_gem_obj[i]->base; + drm_gem_object_unreference_unlocked(obj); + } + kfree(exynos_fb); exynos_fb = NULL; } @@ -134,11 +145,11 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, return ERR_PTR(-ENOENT); } - drm_gem_object_unreference_unlocked(obj); - fb = exynos_drm_framebuffer_init(dev, mode_cmd, obj); - if (IS_ERR(fb)) + if (IS_ERR(fb)) { + drm_gem_object_unreference_unlocked(obj); return fb; + } exynos_fb = to_exynos_fb(fb); nr = exynos_drm_format_num_buffers(fb->pixel_format); @@ -152,8 +163,6 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, return ERR_PTR(-ENOENT); } - drm_gem_object_unreference_unlocked(obj); - exynos_fb->exynos_gem_obj[i] = to_exynos_gem_obj(obj); } From ddcd0f41471a1e0394c8840a119ec3986a78462c Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 31 May 2012 22:53:39 -0300 Subject: [PATCH 165/475] Bluetooth: Fix checking the wrong flag when accepting a socket Most probably a typo, the check should have been for BT_SK_DEFER_SETUP instead of BT_DEFER_SETUP (which right now only represents a socket option). Signed-off-by: Vinicius Costa Gomes Acked-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/af_bluetooth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 46e7f86acfc..3e18af4dadc 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -210,7 +210,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) } if (sk->sk_state == BT_CONNECTED || !newsock || - test_bit(BT_DEFER_SETUP, &bt_sk(parent)->flags)) { + test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) { bt_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); From f56fdcef4d8991b0906461fec6494d7f9d401ef3 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 16 May 2012 17:08:54 +0200 Subject: [PATCH 166/475] drm/exynos: Remove dummy encoder get_crtc operation implementation The encoder get_crtc operation is called to retrieve a pointer to the CRTC the encoder is currenctly connected to, right after setting the encoder::crtc field to the new CRTC. The implementation of this operation returns the pointer to the new CRTC, which is then pointlessly compared to itself. As the operation is not mandatory, don't implement it. Signed-off-by: Laurent Pinchart Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_encoder.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c index 6e9ac7bd1dc..23d5ad379f8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_encoder.c +++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c @@ -172,19 +172,12 @@ static void exynos_drm_encoder_commit(struct drm_encoder *encoder) manager_ops->commit(manager->dev); } -static struct drm_crtc * -exynos_drm_encoder_get_crtc(struct drm_encoder *encoder) -{ - return encoder->crtc; -} - static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = { .dpms = exynos_drm_encoder_dpms, .mode_fixup = exynos_drm_encoder_mode_fixup, .mode_set = exynos_drm_encoder_mode_set, .prepare = exynos_drm_encoder_prepare, .commit = exynos_drm_encoder_commit, - .get_crtc = exynos_drm_encoder_get_crtc, }; static void exynos_drm_encoder_destroy(struct drm_encoder *encoder) From 5736603bef2383b6bb07f88596ccc8c387d91121 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Tue, 15 May 2012 17:22:08 +0900 Subject: [PATCH 167/475] drm/exynos: fixed blending for hdmi graphic layer Blending for graphic layer 0 of hdmi mixer was not set so video layer cannot be showed if graphic layer 0 is enabled. This patch fixes blending values to support blending between graphic layer 0 and video layer. Signed-off-by: Seung-Woo Kim Signed-off-by: Kyungmin Park Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 5a46e583c5b..e2147a2ddce 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -601,18 +601,20 @@ static void mixer_win_reset(struct mixer_context *ctx) mixer_reg_write(res, MXR_BG_COLOR2, 0x008080); /* setting graphical layers */ - val = MXR_GRP_CFG_COLOR_KEY_DISABLE; /* no blank key */ val |= MXR_GRP_CFG_WIN_BLEND_EN; + val |= MXR_GRP_CFG_BLEND_PRE_MUL; + val |= MXR_GRP_CFG_PIXEL_BLEND_EN; val |= MXR_GRP_CFG_ALPHA_VAL(0xff); /* non-transparent alpha */ /* the same configuration for both layers */ mixer_reg_write(res, MXR_GRAPHIC_CFG(0), val); - - val |= MXR_GRP_CFG_BLEND_PRE_MUL; - val |= MXR_GRP_CFG_PIXEL_BLEND_EN; mixer_reg_write(res, MXR_GRAPHIC_CFG(1), val); + /* setting video layers */ + val = MXR_GRP_CFG_ALPHA_VAL(0); + mixer_reg_write(res, MXR_VIDEO_CFG, val); + /* configuration of Video Processor Registers */ vp_win_reset(ctx); vp_default_filter(res); From f1ea8b66e5bf85802ae59961981f5e0d61510b18 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 3 Jun 2012 01:49:32 -0700 Subject: [PATCH 168/475] [PARISC] fix missing TAINT_WARN problem Al viro broke us with commit edd63a2763bdae0daa4f0a4d4c5d61d1154352a5 Author: Al Viro Date: Fri Apr 27 13:42:45 2012 -0400 set_restore_sigmask() is never called without SIGPENDING (and never should be) Although it's pretty much our fault since parisc's asm/bug.h uses BUGWARN_TAINT but doesn't include the file that defines it. Fix that. Signed-off-by: James Bottomley --- arch/parisc/include/asm/bug.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 72cfdb0cfdd..62a33338549 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -1,6 +1,8 @@ #ifndef _PARISC_BUG_H #define _PARISC_BUG_H +#include /* for BUGFLAG_TAINT */ + /* * Tell the user there is some problem. * The offending file and line are encoded in the __bug_table section. From 731455624fe5af906877aae80fb107daccaa9599 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 5 Jun 2012 13:53:04 +0900 Subject: [PATCH 169/475] [PARISC] fix compile break in use of lib/strncopy_from_user.c Linus broke us with commit 36126f8f2ed8168eb13aa0662b9b9585cba100a9 Author: Linus Torvalds Date: Sat May 26 10:43:17 2012 -0700 word-at-a-time: make the interfaces truly generic By moving functions defined in strncopy_from_user.c into the asm-geneic version word-at-a-time.h. Spark and OpenRisc were fixed to use this, but not parisc. Fix by adding to generic-y in asm/Kbuild Signed-off-by: James Bottomley --- arch/parisc/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 19a434f5505..4383707d980 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -1,3 +1,4 @@ include include/asm-generic/Kbuild.asm header-y += pdc.h +generic-y += word-at-a-time.h From 4c01acc01d77e8df5727247aa5ef5912c00256bf Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 5 Jun 2012 13:54:09 +0900 Subject: [PATCH 170/475] [PARISC] fix code to find libgcc Sam broke this with commit 1f2bfbd00e466ff3489b2ca5cc75b1cccd14c123 Author: Sam Ravnborg Date: Sat May 5 10:18:41 2012 +0200 kbuild: link of vmlinux moved to a script But we should be deriving the location of libgcc in the same way as all the other archs, so fix by adding a LIBGCC variable which is evaluated in the makefile Signed-off-by: James Bottomley --- arch/parisc/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index dbc3850b1d0..5707f1a6234 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -21,6 +21,7 @@ KBUILD_DEFCONFIG := default_defconfig NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 +LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) MACHINE := $(shell uname -m) ifeq ($(MACHINE),parisc*) @@ -79,7 +80,7 @@ kernel-y := mm/ kernel/ math-emu/ kernel-$(CONFIG_HPUX) += hpux/ core-y += $(addprefix arch/parisc/, $(kernel-y)) -libs-y += arch/parisc/lib/ `$(CC) -print-libgcc-file-name` +libs-y += arch/parisc/lib/ $(LIBGCC) drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/ From d4e30ef05c9e0fad9782de34f0acd039e238fd43 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 4 Jun 2012 17:18:51 -0400 Subject: [PATCH 171/475] drm/radeon: make audio_init consistent across asics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call it in the asic startup callback on all asics. Previously r600 and rv770 called it in the startup and resume callbacks while all the other asics called it in the startup callback. Signed-off-by: Alex Deucher Reviewed-by: RafaÅ‚ MiÅ‚ecki Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600.c | 15 ++++++--------- drivers/gpu/drm/radeon/rs600.c | 12 ++++++------ drivers/gpu/drm/radeon/rs690.c | 12 ++++++------ drivers/gpu/drm/radeon/rv770.c | 18 ++++++------------ 4 files changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 45cfcea6350..f30dc95f83b 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2426,6 +2426,12 @@ int r600_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + DRM_ERROR("radeon: audio init failed\n"); + return r; + } + return 0; } @@ -2462,12 +2468,6 @@ int r600_resume(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - DRM_ERROR("radeon: audio resume failed\n"); - return r; - } - return r; } @@ -2577,9 +2577,6 @@ int r600_init(struct radeon_device *rdev) rdev->accel_working = false; } - r = r600_audio_init(rdev); - if (r) - return r; /* TODO error handling */ return 0; } diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 25f9eef12c4..e95c5e61d4e 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -908,12 +908,6 @@ static int rs600_startup(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "failed initializing audio\n"); - return r; - } - r = radeon_ib_pool_start(rdev); if (r) return r; @@ -922,6 +916,12 @@ static int rs600_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + dev_err(rdev->dev, "failed initializing audio\n"); + return r; + } + return 0; } diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 3277ddecfe9..159b6a43fda 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -637,12 +637,6 @@ static int rs690_startup(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "failed initializing audio\n"); - return r; - } - r = radeon_ib_pool_start(rdev); if (r) return r; @@ -651,6 +645,12 @@ static int rs690_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + dev_err(rdev->dev, "failed initializing audio\n"); + return r; + } + return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 04ddc365a90..4ad0281fdc3 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -956,6 +956,12 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + DRM_ERROR("radeon: audio init failed\n"); + return r; + } + return 0; } @@ -978,12 +984,6 @@ int rv770_resume(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "radeon: audio init failed\n"); - return r; - } - return r; } @@ -1092,12 +1092,6 @@ int rv770_init(struct radeon_device *rdev) rdev->accel_working = false; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "radeon: audio init failed\n"); - return r; - } - return 0; } From 0aecb5a4ba1ea4167f31d1790eec027f1e658f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 4 Jun 2012 18:36:58 +0200 Subject: [PATCH 172/475] drm/radeon/audio: don't hardcode CRTC id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is based on info released by AMD, should allow using audio in much more cases. Signed-off-by: RafaÅ‚ MiÅ‚ecki Reviewed-by: Alex Deucher Cc: Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_audio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index 7c4fa77f018..7479a5c503e 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -192,6 +192,7 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock) struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc); int base_rate = 48000; switch (radeon_encoder->encoder_id) { @@ -217,8 +218,8 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock) WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10); WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071); - /* Some magic trigger or src sel? */ - WREG32_P(0x5ac, 0x01, ~0x77); + /* Select DTO source */ + WREG32(0x5ac, radeon_crtc->crtc_id); } else { switch (dig->dig_encoder) { case 0: From 7838e05a0d29f27f4509290d866e5fc14e642c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 4 Jun 2012 20:18:03 +0200 Subject: [PATCH 173/475] drm/radeon/hdmi: don't set SEND_MAX_PACKETS bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many TVs and A/V receivers don't work with this bit set. Problem was confirmed using: Onkyo TX-SR605, Sony BRAVIA KDL-52X3500, Sony BRAVIA KDL-40S40xx. In theory this bit shouldn't affect audio engine when feeding it with data, however it seems it does. Driver fglrx doesn't set that bit in any of the above cases. This fixes a regression introduced by 3.5-rc1. Signed-off-by: RafaÅ‚ MiÅ‚ecki Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_hdmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e00ac..969c27529df 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -348,7 +348,6 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset, HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */ HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */ - HDMI0_AUDIO_SEND_MAX_PACKETS | /* send NULL packets if no audio is available */ HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */ HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */ } From 1a8ca7502c22f7a3802f9a207bf80db5439b11c2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 Jun 2012 18:58:22 -0400 Subject: [PATCH 174/475] drm/radeon: fix gpu_init on si - Properly set up the RBs - Properly set up the SPI - Properly set up gb_addr_config This should fix rendering issues on certain cards. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon.h | 5 +- drivers/gpu/drm/radeon/radeon_kms.c | 2 +- drivers/gpu/drm/radeon/si.c | 481 ++++++++++------------------ drivers/gpu/drm/radeon/sid.h | 19 ++ 4 files changed, 186 insertions(+), 321 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 85dac33e3cc..fefcca55c1e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1374,9 +1374,9 @@ struct cayman_asic { struct si_asic { unsigned max_shader_engines; - unsigned max_pipes_per_simd; unsigned max_tile_pipes; - unsigned max_simds_per_se; + unsigned max_cu_per_sh; + unsigned max_sh_per_se; unsigned max_backends_per_se; unsigned max_texture_channel_caches; unsigned max_gprs; @@ -1387,7 +1387,6 @@ struct si_asic { unsigned sc_hiz_tile_fifo_size; unsigned sc_earlyz_tile_fifo_size; - unsigned num_shader_engines; unsigned num_tile_pipes; unsigned num_backends_per_se; unsigned backend_disable_mask_per_asic; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f1016a5820d..5c58d7d90cb 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -273,7 +273,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) break; case RADEON_INFO_MAX_PIPES: if (rdev->family >= CHIP_TAHITI) - value = rdev->config.si.max_pipes_per_simd; + value = rdev->config.si.max_cu_per_sh; else if (rdev->family >= CHIP_CAYMAN) value = rdev->config.cayman.max_pipes_per_simd; else if (rdev->family >= CHIP_CEDAR) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 549732e56ca..c7b61f16ecf 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -867,200 +867,6 @@ void dce6_bandwidth_update(struct radeon_device *rdev) /* * Core functions */ -static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev, - u32 num_tile_pipes, - u32 num_backends_per_asic, - u32 *backend_disable_mask_per_asic, - u32 num_shader_engines) -{ - u32 backend_map = 0; - u32 enabled_backends_mask = 0; - u32 enabled_backends_count = 0; - u32 num_backends_per_se; - u32 cur_pipe; - u32 swizzle_pipe[SI_MAX_PIPES]; - u32 cur_backend = 0; - u32 i; - bool force_no_swizzle; - - /* force legal values */ - if (num_tile_pipes < 1) - num_tile_pipes = 1; - if (num_tile_pipes > rdev->config.si.max_tile_pipes) - num_tile_pipes = rdev->config.si.max_tile_pipes; - if (num_shader_engines < 1) - num_shader_engines = 1; - if (num_shader_engines > rdev->config.si.max_shader_engines) - num_shader_engines = rdev->config.si.max_shader_engines; - if (num_backends_per_asic < num_shader_engines) - num_backends_per_asic = num_shader_engines; - if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines)) - num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines; - - /* make sure we have the same number of backends per se */ - num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines); - /* set up the number of backends per se */ - num_backends_per_se = num_backends_per_asic / num_shader_engines; - if (num_backends_per_se > rdev->config.si.max_backends_per_se) { - num_backends_per_se = rdev->config.si.max_backends_per_se; - num_backends_per_asic = num_backends_per_se * num_shader_engines; - } - - /* create enable mask and count for enabled backends */ - for (i = 0; i < SI_MAX_BACKENDS; ++i) { - if (((*backend_disable_mask_per_asic >> i) & 1) == 0) { - enabled_backends_mask |= (1 << i); - ++enabled_backends_count; - } - if (enabled_backends_count == num_backends_per_asic) - break; - } - - /* force the backends mask to match the current number of backends */ - if (enabled_backends_count != num_backends_per_asic) { - u32 this_backend_enabled; - u32 shader_engine; - u32 backend_per_se; - - enabled_backends_mask = 0; - enabled_backends_count = 0; - *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK; - for (i = 0; i < SI_MAX_BACKENDS; ++i) { - /* calc the current se */ - shader_engine = i / rdev->config.si.max_backends_per_se; - /* calc the backend per se */ - backend_per_se = i % rdev->config.si.max_backends_per_se; - /* default to not enabled */ - this_backend_enabled = 0; - if ((shader_engine < num_shader_engines) && - (backend_per_se < num_backends_per_se)) - this_backend_enabled = 1; - if (this_backend_enabled) { - enabled_backends_mask |= (1 << i); - *backend_disable_mask_per_asic &= ~(1 << i); - ++enabled_backends_count; - } - } - } - - - memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES); - switch (rdev->family) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - force_no_swizzle = true; - break; - default: - force_no_swizzle = false; - break; - } - if (force_no_swizzle) { - bool last_backend_enabled = false; - - force_no_swizzle = false; - for (i = 0; i < SI_MAX_BACKENDS; ++i) { - if (((enabled_backends_mask >> i) & 1) == 1) { - if (last_backend_enabled) - force_no_swizzle = true; - last_backend_enabled = true; - } else - last_backend_enabled = false; - } - } - - switch (num_tile_pipes) { - case 1: - case 3: - case 5: - case 7: - DRM_ERROR("odd number of pipes!\n"); - break; - case 2: - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - break; - case 4: - if (force_no_swizzle) { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - swizzle_pipe[2] = 2; - swizzle_pipe[3] = 3; - } else { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 2; - swizzle_pipe[2] = 1; - swizzle_pipe[3] = 3; - } - break; - case 6: - if (force_no_swizzle) { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - swizzle_pipe[2] = 2; - swizzle_pipe[3] = 3; - swizzle_pipe[4] = 4; - swizzle_pipe[5] = 5; - } else { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 2; - swizzle_pipe[2] = 4; - swizzle_pipe[3] = 1; - swizzle_pipe[4] = 3; - swizzle_pipe[5] = 5; - } - break; - case 8: - if (force_no_swizzle) { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - swizzle_pipe[2] = 2; - swizzle_pipe[3] = 3; - swizzle_pipe[4] = 4; - swizzle_pipe[5] = 5; - swizzle_pipe[6] = 6; - swizzle_pipe[7] = 7; - } else { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 2; - swizzle_pipe[2] = 4; - swizzle_pipe[3] = 6; - swizzle_pipe[4] = 1; - swizzle_pipe[5] = 3; - swizzle_pipe[6] = 5; - swizzle_pipe[7] = 7; - } - break; - } - - for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { - while (((1 << cur_backend) & enabled_backends_mask) == 0) - cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; - - backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4))); - - cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; - } - - return backend_map; -} - -static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev, - u32 disable_mask_per_se, - u32 max_disable_mask_per_se, - u32 num_shader_engines) -{ - u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se); - u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se; - - if (num_shader_engines == 1) - return disable_mask_per_asic; - else if (num_shader_engines == 2) - return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se); - else - return 0xffffffff; -} - static void si_tiling_mode_table_init(struct radeon_device *rdev) { const u32 num_tile_mode_states = 32; @@ -1562,18 +1368,151 @@ static void si_tiling_mode_table_init(struct radeon_device *rdev) DRM_ERROR("unknown asic: 0x%x\n", rdev->family); } +static void si_select_se_sh(struct radeon_device *rdev, + u32 se_num, u32 sh_num) +{ + u32 data = INSTANCE_BROADCAST_WRITES; + + if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) + data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; + else if (se_num == 0xffffffff) + data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); + else if (sh_num == 0xffffffff) + data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); + else + data |= SH_INDEX(sh_num) | SE_INDEX(se_num); + WREG32(GRBM_GFX_INDEX, data); +} + +static u32 si_create_bitmask(u32 bit_width) +{ + u32 i, mask = 0; + + for (i = 0; i < bit_width; i++) { + mask <<= 1; + mask |= 1; + } + return mask; +} + +static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh) +{ + u32 data, mask; + + data = RREG32(CC_GC_SHADER_ARRAY_CONFIG); + if (data & 1) + data &= INACTIVE_CUS_MASK; + else + data = 0; + data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG); + + data >>= INACTIVE_CUS_SHIFT; + + mask = si_create_bitmask(cu_per_sh); + + return ~data & mask; +} + +static void si_setup_spi(struct radeon_device *rdev, + u32 se_num, u32 sh_per_se, + u32 cu_per_sh) +{ + int i, j, k; + u32 data, mask, active_cu; + + for (i = 0; i < se_num; i++) { + for (j = 0; j < sh_per_se; j++) { + si_select_se_sh(rdev, i, j); + data = RREG32(SPI_STATIC_THREAD_MGMT_3); + active_cu = si_get_cu_enabled(rdev, cu_per_sh); + + mask = 1; + for (k = 0; k < 16; k++) { + mask <<= k; + if (active_cu & mask) { + data &= ~mask; + WREG32(SPI_STATIC_THREAD_MGMT_3, data); + break; + } + } + } + } + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); +} + +static u32 si_get_rb_disabled(struct radeon_device *rdev, + u32 max_rb_num, u32 se_num, + u32 sh_per_se) +{ + u32 data, mask; + + data = RREG32(CC_RB_BACKEND_DISABLE); + if (data & 1) + data &= BACKEND_DISABLE_MASK; + else + data = 0; + data |= RREG32(GC_USER_RB_BACKEND_DISABLE); + + data >>= BACKEND_DISABLE_SHIFT; + + mask = si_create_bitmask(max_rb_num / se_num / sh_per_se); + + return data & mask; +} + +static void si_setup_rb(struct radeon_device *rdev, + u32 se_num, u32 sh_per_se, + u32 max_rb_num) +{ + int i, j; + u32 data, mask; + u32 disabled_rbs = 0; + u32 enabled_rbs = 0; + + for (i = 0; i < se_num; i++) { + for (j = 0; j < sh_per_se; j++) { + si_select_se_sh(rdev, i, j); + data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); + disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH); + } + } + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); + + mask = 1; + for (i = 0; i < max_rb_num; i++) { + if (!(disabled_rbs & mask)) + enabled_rbs |= mask; + mask <<= 1; + } + + for (i = 0; i < se_num; i++) { + si_select_se_sh(rdev, i, 0xffffffff); + data = 0; + for (j = 0; j < sh_per_se; j++) { + switch (enabled_rbs & 3) { + case 1: + data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); + break; + case 2: + data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); + break; + case 3: + default: + data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); + break; + } + enabled_rbs >>= 2; + } + WREG32(PA_SC_RASTER_CONFIG, data); + } + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); +} + static void si_gpu_init(struct radeon_device *rdev) { - u32 cc_rb_backend_disable = 0; - u32 cc_gc_shader_array_config; u32 gb_addr_config = 0; u32 mc_shared_chmap, mc_arb_ramcfg; - u32 gb_backend_map; - u32 cgts_tcc_disable; u32 sx_debug_1; - u32 gc_user_shader_array_config; - u32 gc_user_rb_backend_disable; - u32 cgts_user_tcc_disable; u32 hdp_host_path_cntl; u32 tmp; int i, j; @@ -1581,9 +1520,9 @@ static void si_gpu_init(struct radeon_device *rdev) switch (rdev->family) { case CHIP_TAHITI: rdev->config.si.max_shader_engines = 2; - rdev->config.si.max_pipes_per_simd = 4; rdev->config.si.max_tile_pipes = 12; - rdev->config.si.max_simds_per_se = 8; + rdev->config.si.max_cu_per_sh = 8; + rdev->config.si.max_sh_per_se = 2; rdev->config.si.max_backends_per_se = 4; rdev->config.si.max_texture_channel_caches = 12; rdev->config.si.max_gprs = 256; @@ -1594,12 +1533,13 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.sc_prim_fifo_size_backend = 0x100; rdev->config.si.sc_hiz_tile_fifo_size = 0x30; rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_PITCAIRN: rdev->config.si.max_shader_engines = 2; - rdev->config.si.max_pipes_per_simd = 4; rdev->config.si.max_tile_pipes = 8; - rdev->config.si.max_simds_per_se = 5; + rdev->config.si.max_cu_per_sh = 5; + rdev->config.si.max_sh_per_se = 2; rdev->config.si.max_backends_per_se = 4; rdev->config.si.max_texture_channel_caches = 8; rdev->config.si.max_gprs = 256; @@ -1610,13 +1550,14 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.sc_prim_fifo_size_backend = 0x100; rdev->config.si.sc_hiz_tile_fifo_size = 0x30; rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_VERDE: default: rdev->config.si.max_shader_engines = 1; - rdev->config.si.max_pipes_per_simd = 4; rdev->config.si.max_tile_pipes = 4; - rdev->config.si.max_simds_per_se = 2; + rdev->config.si.max_cu_per_sh = 2; + rdev->config.si.max_sh_per_se = 2; rdev->config.si.max_backends_per_se = 4; rdev->config.si.max_texture_channel_caches = 4; rdev->config.si.max_gprs = 256; @@ -1627,6 +1568,7 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.sc_prim_fifo_size_backend = 0x40; rdev->config.si.sc_hiz_tile_fifo_size = 0x30; rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN; break; } @@ -1648,31 +1590,7 @@ static void si_gpu_init(struct radeon_device *rdev) mc_shared_chmap = RREG32(MC_SHARED_CHMAP); mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); - cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE); - cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG); - cgts_tcc_disable = 0xffff0000; - for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++) - cgts_tcc_disable &= ~(1 << (16 + i)); - gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE); - gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG); - cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE); - - rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines; rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes; - tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; - rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp); - tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; - rdev->config.si.backend_disable_mask_per_asic = - si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK, - rdev->config.si.num_shader_engines); - rdev->config.si.backend_map = - si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, - rdev->config.si.num_backends_per_se * - rdev->config.si.num_shader_engines, - &rdev->config.si.backend_disable_mask_per_asic, - rdev->config.si.num_shader_engines); - tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT; - rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp); rdev->config.si.mem_max_burst_length_bytes = 256; tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; @@ -1683,55 +1601,8 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.num_gpus = 1; rdev->config.si.multi_gpu_tile_size = 64; - gb_addr_config = 0; - switch (rdev->config.si.num_tile_pipes) { - case 1: - gb_addr_config |= NUM_PIPES(0); - break; - case 2: - gb_addr_config |= NUM_PIPES(1); - break; - case 4: - gb_addr_config |= NUM_PIPES(2); - break; - case 8: - default: - gb_addr_config |= NUM_PIPES(3); - break; - } - - tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1; - gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp); - gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1); - tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1; - gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp); - switch (rdev->config.si.num_gpus) { - case 1: - default: - gb_addr_config |= NUM_GPUS(0); - break; - case 2: - gb_addr_config |= NUM_GPUS(1); - break; - case 4: - gb_addr_config |= NUM_GPUS(2); - break; - } - switch (rdev->config.si.multi_gpu_tile_size) { - case 16: - gb_addr_config |= MULTI_GPU_TILE_SIZE(0); - break; - case 32: - default: - gb_addr_config |= MULTI_GPU_TILE_SIZE(1); - break; - case 64: - gb_addr_config |= MULTI_GPU_TILE_SIZE(2); - break; - case 128: - gb_addr_config |= MULTI_GPU_TILE_SIZE(3); - break; - } + /* fix up row size */ + gb_addr_config &= ~ROW_SIZE_MASK; switch (rdev->config.si.mem_row_size_in_kb) { case 1: default: @@ -1745,26 +1616,6 @@ static void si_gpu_init(struct radeon_device *rdev) break; } - tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT; - rdev->config.si.num_tile_pipes = (1 << tmp); - tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT; - rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256; - tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT; - rdev->config.si.num_shader_engines = tmp + 1; - tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT; - rdev->config.si.num_gpus = tmp + 1; - tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT; - rdev->config.si.multi_gpu_tile_size = 1 << tmp; - tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT; - rdev->config.si.mem_row_size_in_kb = 1 << tmp; - - gb_backend_map = - si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, - rdev->config.si.num_backends_per_se * - rdev->config.si.num_shader_engines, - &rdev->config.si.backend_disable_mask_per_asic, - rdev->config.si.num_shader_engines); - /* setup tiling info dword. gb_addr_config is not adequate since it does * not have bank info, so create a custom tiling dword. * bits 3:0 num_pipes @@ -1789,34 +1640,30 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.tile_config |= (3 << 0); break; } - rdev->config.si.tile_config |= - ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; + if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) + rdev->config.si.tile_config |= 1 << 4; + else + rdev->config.si.tile_config |= 0 << 4; rdev->config.si.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; rdev->config.si.tile_config |= ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; - rdev->config.si.backend_map = gb_backend_map; WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); - /* primary versions */ - WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); - - WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable); - - /* user versions */ - WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); - - WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); - si_tiling_mode_table_init(rdev); + si_setup_rb(rdev, rdev->config.si.max_shader_engines, + rdev->config.si.max_sh_per_se, + rdev->config.si.max_backends_per_se); + + si_setup_spi(rdev, rdev->config.si.max_shader_engines, + rdev->config.si.max_sh_per_se, + rdev->config.si.max_cu_per_sh); + + /* set HW defaults for 3D engine */ WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b))); diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 53ea2c42dbd..db406796286 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -24,6 +24,11 @@ #ifndef SI_H #define SI_H +#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2 + +#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 +#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 + #define CG_MULT_THERMAL_STATUS 0x714 #define ASIC_MAX_TEMP(x) ((x) << 0) #define ASIC_MAX_TEMP_MASK 0x000001ff @@ -408,6 +413,12 @@ #define SOFT_RESET_IA (1 << 15) #define GRBM_GFX_INDEX 0x802C +#define INSTANCE_INDEX(x) ((x) << 0) +#define SH_INDEX(x) ((x) << 8) +#define SE_INDEX(x) ((x) << 16) +#define SH_BROADCAST_WRITES (1 << 29) +#define INSTANCE_BROADCAST_WRITES (1 << 30) +#define SE_BROADCAST_WRITES (1 << 31) #define GRBM_INT_CNTL 0x8060 # define RDERR_INT_ENABLE (1 << 0) @@ -480,6 +491,8 @@ #define VGT_TF_MEMORY_BASE 0x89B8 #define CC_GC_SHADER_ARRAY_CONFIG 0x89bc +#define INACTIVE_CUS_MASK 0xFFFF0000 +#define INACTIVE_CUS_SHIFT 16 #define GC_USER_SHADER_ARRAY_CONFIG 0x89c0 #define PA_CL_ENHANCE 0x8A14 @@ -688,6 +701,12 @@ #define RLC_MC_CNTL 0xC344 #define RLC_UCODE_CNTL 0xC348 +#define PA_SC_RASTER_CONFIG 0x28350 +# define RASTER_CONFIG_RB_MAP_0 0 +# define RASTER_CONFIG_RB_MAP_1 1 +# define RASTER_CONFIG_RB_MAP_2 2 +# define RASTER_CONFIG_RB_MAP_3 3 + #define VGT_EVENT_INITIATOR 0x28a90 # define SAMPLE_STREAMOUTSTATS1 (1 << 0) # define SAMPLE_STREAMOUTSTATS2 (2 << 0) From bb4091558228ff4a3e02328c931e683fc7f08722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 3 Jun 2012 16:09:43 +0200 Subject: [PATCH 175/475] drm/radeon: fix vm deadlocks on cayman MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Locking mutex in different orders just screams for deadlocks, and some testing showed that it is actually quite easy to trigger them. Signed-off-by: Christian König Reviewed-by: Jerome Glisse Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_gart.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 79db56e6c2a..59d44937dd9 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -476,12 +476,18 @@ int radeon_vm_bo_add(struct radeon_device *rdev, mutex_lock(&vm->mutex); if (last_pfn > vm->last_pfn) { - /* grow va space 32M by 32M */ - unsigned align = ((32 << 20) >> 12) - 1; + /* release mutex and lock in right order */ + mutex_unlock(&vm->mutex); radeon_mutex_lock(&rdev->cs_mutex); - radeon_vm_unbind_locked(rdev, vm); + mutex_lock(&vm->mutex); + /* and check again */ + if (last_pfn > vm->last_pfn) { + /* grow va space 32M by 32M */ + unsigned align = ((32 << 20) >> 12) - 1; + radeon_vm_unbind_locked(rdev, vm); + vm->last_pfn = (last_pfn + align) & ~align; + } radeon_mutex_unlock(&rdev->cs_mutex); - vm->last_pfn = (last_pfn + align) & ~align; } head = &vm->va; last_offset = 0; @@ -595,8 +601,8 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, if (bo_va == NULL) return 0; - mutex_lock(&vm->mutex); radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&vm->mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); radeon_mutex_unlock(&rdev->cs_mutex); list_del(&bo_va->vm_list); @@ -641,9 +647,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) struct radeon_bo_va *bo_va, *tmp; int r; - mutex_lock(&vm->mutex); - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&vm->mutex); radeon_vm_unbind_locked(rdev, vm); radeon_mutex_unlock(&rdev->cs_mutex); From 61663171bea0ccb499f0818234c5be7ea6c4c791 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Tue, 5 Jun 2012 18:10:10 +0800 Subject: [PATCH 176/475] regulator: max8649: fix missing regmap in rdev In probe(), rdev->regmap must be assigned. Signed-off-by: Haojian Zhuang Signed-off-by: Mark Brown --- drivers/regulator/max8649.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c index 1f4bb80457b..9d540cd02da 100644 --- a/drivers/regulator/max8649.c +++ b/drivers/regulator/max8649.c @@ -259,6 +259,7 @@ static int __devinit max8649_regulator_probe(struct i2c_client *client, config.dev = &client->dev; config.init_data = pdata->regulator; config.driver_data = info; + config.regmap = info->regmap; info->regulator = regulator_register(&dcdc_desc, &config); if (IS_ERR(info->regulator)) { From c8fdc1b56611faa7b38eab6b99da5e20113661ff Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Jun 2012 12:25:19 +0100 Subject: [PATCH 177/475] ASoC: wm8994: Ensure all AIFnCLK events are run from the _late variants Ensure that all the actions get taken at appropriate times by calling the _PRE and _POST events for the aifNclk_ev functions explicitly. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm8994.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 993639d694c..6d3a8654654 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -1190,17 +1190,19 @@ static int late_enable_ev(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_PRE_PMU: if (wm8994->aif1clk_enable) { - aif1clk_ev(w, kcontrol, event); + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMU); snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1, WM8994_AIF1CLK_ENA_MASK, WM8994_AIF1CLK_ENA); + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMU); wm8994->aif1clk_enable = 0; } if (wm8994->aif2clk_enable) { - aif2clk_ev(w, kcontrol, event); + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMU); snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1, WM8994_AIF2CLK_ENA_MASK, WM8994_AIF2CLK_ENA); + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMU); wm8994->aif2clk_enable = 0; } break; @@ -1221,15 +1223,17 @@ static int late_disable_ev(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMD: if (wm8994->aif1clk_disable) { + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMD); snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1, WM8994_AIF1CLK_ENA_MASK, 0); - aif1clk_ev(w, kcontrol, event); + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMD); wm8994->aif1clk_disable = 0; } if (wm8994->aif2clk_disable) { + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMD); snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1, WM8994_AIF2CLK_ENA_MASK, 0); - aif2clk_ev(w, kcontrol, event); + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMD); wm8994->aif2clk_disable = 0; } break; From bfd37bb5f681961e255fd2f346c20fdae2ef3f27 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Jun 2012 12:31:32 +0100 Subject: [PATCH 178/475] ASoC: wm8994: Apply volume updates with clocks enabled Volume updates may not be acted upon if there is no clock applied when the volume update is written. Ensure this doesn't happen by writing out registers with volume updates after we enable each of the clocks. There are more registers updated than before as previously we were relying on wm_hubs to set those for controls it manages. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm8994.c | 93 ++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 6d3a8654654..aa8c98b628d 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -46,6 +46,39 @@ #define WM8994_NUM_DRC 3 #define WM8994_NUM_EQ 3 +static struct { + unsigned int reg; + unsigned int mask; +} wm8994_vu_bits[] = { + { WM8994_LEFT_LINE_INPUT_1_2_VOLUME, WM8994_IN1_VU }, + { WM8994_RIGHT_LINE_INPUT_1_2_VOLUME, WM8994_IN1_VU }, + { WM8994_LEFT_LINE_INPUT_3_4_VOLUME, WM8994_IN2_VU }, + { WM8994_RIGHT_LINE_INPUT_3_4_VOLUME, WM8994_IN2_VU }, + { WM8994_SPEAKER_VOLUME_LEFT, WM8994_SPKOUT_VU }, + { WM8994_SPEAKER_VOLUME_RIGHT, WM8994_SPKOUT_VU }, + { WM8994_LEFT_OUTPUT_VOLUME, WM8994_HPOUT1_VU }, + { WM8994_RIGHT_OUTPUT_VOLUME, WM8994_HPOUT1_VU }, + { WM8994_LEFT_OPGA_VOLUME, WM8994_MIXOUT_VU }, + { WM8994_RIGHT_OPGA_VOLUME, WM8994_MIXOUT_VU }, + + { WM8994_AIF1_DAC1_LEFT_VOLUME, WM8994_AIF1DAC1_VU }, + { WM8994_AIF1_DAC1_RIGHT_VOLUME, WM8994_AIF1DAC1_VU }, + { WM8994_AIF1_DAC2_LEFT_VOLUME, WM8994_AIF1DAC2_VU }, + { WM8994_AIF1_DAC2_RIGHT_VOLUME, WM8994_AIF1DAC2_VU }, + { WM8994_AIF2_DAC_LEFT_VOLUME, WM8994_AIF2DAC_VU }, + { WM8994_AIF2_DAC_RIGHT_VOLUME, WM8994_AIF2DAC_VU }, + { WM8994_AIF1_ADC1_LEFT_VOLUME, WM8994_AIF1ADC1_VU }, + { WM8994_AIF1_ADC1_RIGHT_VOLUME, WM8994_AIF1ADC1_VU }, + { WM8994_AIF1_ADC2_LEFT_VOLUME, WM8994_AIF1ADC2_VU }, + { WM8994_AIF1_ADC2_RIGHT_VOLUME, WM8994_AIF1ADC2_VU }, + { WM8994_AIF2_ADC_LEFT_VOLUME, WM8994_AIF2ADC_VU }, + { WM8994_AIF2_ADC_RIGHT_VOLUME, WM8994_AIF1ADC2_VU }, + { WM8994_DAC1_LEFT_VOLUME, WM8994_DAC1_VU }, + { WM8994_DAC1_RIGHT_VOLUME, WM8994_DAC1_VU }, + { WM8994_DAC2_LEFT_VOLUME, WM8994_DAC2_VU }, + { WM8994_DAC2_RIGHT_VOLUME, WM8994_DAC2_VU }, +}; + static int wm8994_drc_base[] = { WM8994_AIF1_DRC1_1, WM8994_AIF1_DRC2_1, @@ -989,6 +1022,7 @@ static int aif1clk_ev(struct snd_soc_dapm_widget *w, struct snd_soc_codec *codec = w->codec; struct wm8994 *control = codec->control_data; int mask = WM8994_AIF1DAC1L_ENA | WM8994_AIF1DAC1R_ENA; + int i; int dac; int adc; int val; @@ -1047,6 +1081,13 @@ static int aif1clk_ev(struct snd_soc_dapm_widget *w, WM8994_AIF1DAC2L_ENA); break; + case SND_SOC_DAPM_POST_PMU: + for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) + snd_soc_write(codec, wm8994_vu_bits[i].reg, + snd_soc_read(codec, + wm8994_vu_bits[i].reg)); + break; + case SND_SOC_DAPM_PRE_PMD: case SND_SOC_DAPM_POST_PMD: snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5, @@ -1072,6 +1113,7 @@ static int aif2clk_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = w->codec; + int i; int dac; int adc; int val; @@ -1122,6 +1164,13 @@ static int aif2clk_ev(struct snd_soc_dapm_widget *w, WM8994_AIF2DACR_ENA); break; + case SND_SOC_DAPM_POST_PMU: + for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) + snd_soc_write(codec, wm8994_vu_bits[i].reg, + snd_soc_read(codec, + wm8994_vu_bits[i].reg)); + break; + case SND_SOC_DAPM_PRE_PMD: case SND_SOC_DAPM_POST_PMD: snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5, @@ -1531,9 +1580,11 @@ SND_SOC_DAPM_POST("Late Disable PGA", late_disable_ev) static const struct snd_soc_dapm_widget wm8994_lateclk_widgets[] = { SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, aif1clk_ev, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, aif2clk_ev, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_PGA("Direct Voice", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_MIXER("SPKL", WM8994_POWER_MANAGEMENT_3, 8, 0, left_speaker_mixer, ARRAY_SIZE(left_speaker_mixer)), @@ -3883,39 +3934,11 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) pm_runtime_put(codec->dev); - /* Latch volume updates (right only; we always do left then right). */ - snd_soc_update_bits(codec, WM8994_AIF1_DAC1_LEFT_VOLUME, - WM8994_AIF1DAC1_VU, WM8994_AIF1DAC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_DAC1_RIGHT_VOLUME, - WM8994_AIF1DAC1_VU, WM8994_AIF1DAC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_DAC2_LEFT_VOLUME, - WM8994_AIF1DAC2_VU, WM8994_AIF1DAC2_VU); - snd_soc_update_bits(codec, WM8994_AIF1_DAC2_RIGHT_VOLUME, - WM8994_AIF1DAC2_VU, WM8994_AIF1DAC2_VU); - snd_soc_update_bits(codec, WM8994_AIF2_DAC_LEFT_VOLUME, - WM8994_AIF2DAC_VU, WM8994_AIF2DAC_VU); - snd_soc_update_bits(codec, WM8994_AIF2_DAC_RIGHT_VOLUME, - WM8994_AIF2DAC_VU, WM8994_AIF2DAC_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC1_LEFT_VOLUME, - WM8994_AIF1ADC1_VU, WM8994_AIF1ADC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC1_RIGHT_VOLUME, - WM8994_AIF1ADC1_VU, WM8994_AIF1ADC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC2_LEFT_VOLUME, - WM8994_AIF1ADC2_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC2_RIGHT_VOLUME, - WM8994_AIF1ADC2_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_AIF2_ADC_LEFT_VOLUME, - WM8994_AIF2ADC_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_AIF2_ADC_RIGHT_VOLUME, - WM8994_AIF2ADC_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_DAC1_LEFT_VOLUME, - WM8994_DAC1_VU, WM8994_DAC1_VU); - snd_soc_update_bits(codec, WM8994_DAC1_RIGHT_VOLUME, - WM8994_DAC1_VU, WM8994_DAC1_VU); - snd_soc_update_bits(codec, WM8994_DAC2_LEFT_VOLUME, - WM8994_DAC2_VU, WM8994_DAC2_VU); - snd_soc_update_bits(codec, WM8994_DAC2_RIGHT_VOLUME, - WM8994_DAC2_VU, WM8994_DAC2_VU); + /* Latch volume update bits */ + for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) + snd_soc_update_bits(codec, wm8994_vu_bits[i].reg, + wm8994_vu_bits[i].mask, + wm8994_vu_bits[i].mask); /* Set the low bit of the 3D stereo depth so TLV matches */ snd_soc_update_bits(codec, WM8994_AIF1_DAC1_FILTERS_2, From bfd6185ddecc6e6f6bd654c053c307c9e49ca391 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Jun 2012 14:29:36 +0100 Subject: [PATCH 179/475] regmap: Don't try to map non-existant IRQs If the driver supplied an empty entry in the array of IRQs then return an error rather than trying to do the mapping. This is intended for use with handling chip variants and similar situations. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 4fac4b9be88..b74e14c4dff 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -346,6 +346,10 @@ EXPORT_SYMBOL_GPL(regmap_irq_chip_get_base); */ int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq) { + /* Handle holes in the IRQ list */ + if (!data->chip->irqs[irq].mask) + return -EINVAL; + return irq_create_mapping(data->domain, irq); } EXPORT_SYMBOL_GPL(regmap_irq_get_virq); From a43fd50dc99a5f65505f174eca5a421707d73b4c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Jun 2012 14:34:03 +0100 Subject: [PATCH 180/475] regmap: Implement support for wake IRQs Allow chips to provide a bank of registers for controlling the wake state in a similar fashion to the masks and propagate the wake count to the parent interrupt controller. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 47 ++++++++++++++++++++++++++++++++ include/linux/regmap.h | 2 ++ 2 files changed, 49 insertions(+) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index b74e14c4dff..b480b529f02 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -29,9 +29,13 @@ struct regmap_irq_chip_data { int irq_base; struct irq_domain *domain; + int irq; + int wake_count; + unsigned int *status_buf; unsigned int *mask_buf; unsigned int *mask_buf_def; + unsigned int *wake_buf; unsigned int irq_reg_stride; }; @@ -71,6 +75,16 @@ static void regmap_irq_sync_unlock(struct irq_data *data) d->chip->mask_base + (i * map->reg_stride)); } + /* If we've changed our wakeup count propagate it to the parent */ + if (d->wake_count < 0) + for (i = d->wake_count; i < 0; i++) + irq_set_irq_wake(d->irq, 0); + else if (d->wake_count > 0) + for (i = 0; i < d->wake_count; i++) + irq_set_irq_wake(d->irq, 1); + + d->wake_count = 0; + mutex_unlock(&d->lock); } @@ -92,12 +106,35 @@ static void regmap_irq_disable(struct irq_data *data) d->mask_buf[irq_data->reg_offset / map->reg_stride] |= irq_data->mask; } +static int regmap_irq_set_wake(struct irq_data *data, unsigned int on) +{ + struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); + struct regmap *map = d->map; + const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); + + if (!d->chip->wake_base) + return -EINVAL; + + if (on) { + d->wake_buf[irq_data->reg_offset / map->reg_stride] + &= ~irq_data->mask; + d->wake_count++; + } else { + d->wake_buf[irq_data->reg_offset / map->reg_stride] + |= irq_data->mask; + d->wake_count--; + } + + return 0; +} + static struct irq_chip regmap_irq_chip = { .name = "regmap", .irq_bus_lock = regmap_irq_lock, .irq_bus_sync_unlock = regmap_irq_sync_unlock, .irq_disable = regmap_irq_disable, .irq_enable = regmap_irq_enable, + .irq_set_wake = regmap_irq_set_wake, }; static irqreturn_t regmap_irq_thread(int irq, void *d) @@ -240,6 +277,14 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, if (!d->mask_buf_def) goto err_alloc; + if (chip->wake_base) { + d->wake_buf = kzalloc(sizeof(unsigned int) * chip->num_regs, + GFP_KERNEL); + if (!d->wake_buf) + goto err_alloc; + } + + d->irq = irq; d->map = map; d->chip = chip; d->irq_base = irq_base; @@ -294,6 +339,7 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, err_domain: /* Should really dispose of the domain but... */ err_alloc: + kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); kfree(d->status_buf); @@ -315,6 +361,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) free_irq(irq, d); /* We should unmap the domain but... */ + kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); kfree(d->status_buf); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 56af22ec9ab..58ec0cba0ae 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -219,6 +219,7 @@ struct regmap_irq { * @status_base: Base status register address. * @mask_base: Base mask register address. * @ack_base: Base ack address. If zero then the chip is clear on read. + * @wake_base: Base address for wake enables. If zero unsupported. * @irq_reg_stride: Stride to use for chips where registers are not contiguous. * * @num_regs: Number of registers in each control bank. @@ -232,6 +233,7 @@ struct regmap_irq_chip { unsigned int status_base; unsigned int mask_base; unsigned int ack_base; + unsigned int wake_base; unsigned int irq_reg_stride; int num_regs; From f2ebd422f71cda9c791f76f85d2ca102ae34a1ed Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 22 Apr 2012 17:02:11 +0300 Subject: [PATCH 181/475] KVM: Fix buffer overflow in kvm_set_irq() kvm_set_irq() has an internal buffer of three irq routing entries, allowing connecting a GSI to three IRQ chips or on MSI. However setup_routing_entry() does not properly enforce this, allowing three irqchip routes followed by an MSI route to overflow the buffer. Fix by ensuring that an MSI entry is added to an empty list. Signed-off-by: Avi Kivity --- virt/kvm/irq_comm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a6a0365475e..5afb4311402 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -332,6 +332,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, */ hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->type == KVM_IRQ_ROUTING_MSI || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return r; From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 09:16:47 -0400 Subject: [PATCH 182/475] NFSv4: Fix an Oops in the open recovery code The open recovery code does not need to request a new value for the mdsthreshold, and so does not allocate a struct nfs4_threshold. The problem is that encode_getfattr_open() will still request an mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold. This patch fixes encode_getfattr_open so that it doesn't request an mdsthreshold when the caller isn't asking for one. It also fixes decode_attr_mdsthreshold so that it errors if the server returns an mdsthreshold that we didn't ask for (instead of Oopsing). Signed-off-by: Trond Myklebust Cc: Andy Adamson --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 22 +++++++++++++++++++++- fs/nfs/nfs4xdr.c | 12 ++++++++---- include/linux/nfs_xdr.h | 1 + 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c6827f93ab5..cc5900ac61b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -295,7 +295,7 @@ is_ds_client(struct nfs_client *clp) extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; -extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_fattr_bitmap[3]; extern const u32 nfs4_statfs_bitmap[2]; extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[3]; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d48dbefa0e7..ad1515521a6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -116,7 +116,7 @@ static int nfs4_map_errors(int err) /* * This is our standard bitmap for GETATTR requests. */ -const u32 nfs4_fattr_bitmap[2] = { +const u32 nfs4_fattr_bitmap[3] = { FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE @@ -133,6 +133,24 @@ const u32 nfs4_fattr_bitmap[2] = { | FATTR4_WORD1_TIME_MODIFY }; +static const u32 nfs4_pnfs_open_bitmap[3] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY, + FATTR4_WORD2_MDSTHRESHOLD +}; + const u32 nfs4_statfs_bitmap[2] = { FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE @@ -844,6 +862,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (attrs != NULL && attrs->ia_valid != 0) { __be32 verf[2]; @@ -1820,6 +1839,7 @@ static int _nfs4_do_open(struct inode *dir, opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); if (!opendata->f_attr.mdsthreshold) goto err_opendata_put; + opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } if (dentry->d_inode != NULL) opendata->state = nfs4_get_open_state(dentry->d_inode, sp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ee4a74db95d..9ca1428da9d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1198,12 +1198,13 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c } static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, + const u32 *open_bitmap, struct compound_hdr *hdr) { encode_getattr_three(xdr, - bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1], - bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, + bitmask[0] & open_bitmap[0], + bitmask[1] & open_bitmap[1], + bitmask[2] & open_bitmap[2], hdr); } @@ -2221,7 +2222,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); - encode_getfattr_open(xdr, args->bitmask, &hdr); + encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -4360,6 +4361,9 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) return -EIO; if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { + /* Did the server return an unrequested attribute? */ + if (unlikely(res == NULL)) + return -EREMOTEIO; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d1a7bf51c32..7519baef025 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -348,6 +348,7 @@ struct nfs_openargs { const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; + const u32 * open_bitmap; __u32 claim; struct nfs4_sequence_args seq_args; }; From 029c53473727f21c1dd73237e8d630a6f007a2fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 09:35:44 -0400 Subject: [PATCH 183/475] NFSv4: Fix up decode_attr_mdsthreshold Fix an incorrect use of 'likely()'. The FATTR4_WORD2_MDSTHRESHOLD bit is only expected in NFSv4.1 OPEN calls, and so is actually rather _unlikely_. decode_attr_mdsthreshold needs to clear FATTR4_WORD2_MDSTHRESHOLD from the attribute bitmap after it has decoded the data. Signed-off-by: Trond Myklebust Cc: Andy Adamson --- fs/nfs/nfs4xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9ca1428da9d..18fae29b030 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4360,7 +4360,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) return -EIO; - if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { + if (bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD) { /* Did the server return an unrequested attribute? */ if (unlikely(res == NULL)) return -EREMOTEIO; @@ -4376,6 +4376,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, __func__); status = decode_first_threshold_item4(xdr, res); + bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD; } return status; out_overflow: From 08106ac7c892cad769c5026cb9dd877a39aed603 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:08:24 -0400 Subject: [PATCH 184/475] NFSv4.1: Convert a trivial printk into a dprintk There is no need to bug the user about the server returning an error on destroy_session. The error will be handled by the state manager, without any need for further input from anyone else. So convert that printk into a debugging dprintk. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ad1515521a6..7c218fd1ec2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5766,8 +5766,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status) - printk(KERN_WARNING - "NFS: Got error %d from the server on DESTROY_SESSION. " + dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); dprintk("<-- nfs4_proc_destroy_session\n"); From d430f7dbf7bd6aaaa40c0660b3204df8cf07b22b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jun 2012 09:50:28 -0400 Subject: [PATCH 185/475] drm/radeon/kms: add new Trinity PCI ids Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/ni.c | 21 +++++++++++++++++---- include/drm/drm_pciids.h | 8 ++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 3df4efa1194..3186522a445 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -460,15 +460,28 @@ static void cayman_gpu_init(struct radeon_device *rdev) rdev->config.cayman.max_pipes_per_simd = 4; rdev->config.cayman.max_tile_pipes = 2; if ((rdev->pdev->device == 0x9900) || - (rdev->pdev->device == 0x9901)) { + (rdev->pdev->device == 0x9901) || + (rdev->pdev->device == 0x9905) || + (rdev->pdev->device == 0x9906) || + (rdev->pdev->device == 0x9907) || + (rdev->pdev->device == 0x9908) || + (rdev->pdev->device == 0x9909) || + (rdev->pdev->device == 0x9910) || + (rdev->pdev->device == 0x9917)) { rdev->config.cayman.max_simds_per_se = 6; rdev->config.cayman.max_backends_per_se = 2; } else if ((rdev->pdev->device == 0x9903) || - (rdev->pdev->device == 0x9904)) { + (rdev->pdev->device == 0x9904) || + (rdev->pdev->device == 0x990A) || + (rdev->pdev->device == 0x9913) || + (rdev->pdev->device == 0x9918)) { rdev->config.cayman.max_simds_per_se = 4; rdev->config.cayman.max_backends_per_se = 2; - } else if ((rdev->pdev->device == 0x9990) || - (rdev->pdev->device == 0x9991)) { + } else if ((rdev->pdev->device == 0x9919) || + (rdev->pdev->device == 0x9990) || + (rdev->pdev->device == 0x9991) || + (rdev->pdev->device == 0x9994) || + (rdev->pdev->device == 0x99A0)) { rdev->config.cayman.max_simds_per_se = 3; rdev->config.cayman.max_backends_per_se = 1; } else { diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 58d0bdab68d..961dae0d26e 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -561,11 +561,19 @@ {0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9910, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9913, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9917, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9918, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9919, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9990, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9991, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x99A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x99A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x99A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0, 0, 0} #define r128_PCI_IDS \ From 4a6991cc1fad514745b79181df3ace72d561e7aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jun 2012 09:50:29 -0400 Subject: [PATCH 186/475] drm/radeon/kms: add new Palm, Sumo PCI ids Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 961dae0d26e..c5b0d8cd056 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -531,6 +531,7 @@ {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ + {0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ @@ -550,6 +551,7 @@ {0x1002, 0x9807, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x980A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ From a2bef8ce826dd1e787fd8ad9b6e0566ba59dab43 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jun 2012 09:50:30 -0400 Subject: [PATCH 187/475] drm/radeon/kms: add new BTC PCI ids Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index c5b0d8cd056..86c4cf91639 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -181,6 +181,7 @@ {0x1002, 0x6747, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6748, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x674A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ @@ -198,6 +199,7 @@ {0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6771, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ From 7aaa61b3476462b69f1ac7669fcca8d608ce3cb5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jun 2012 09:50:31 -0400 Subject: [PATCH 188/475] drm/radeon/kms: add new SI PCI ids Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 86c4cf91639..81368ab6c61 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -231,10 +231,11 @@ {0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ - {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ - {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ From c3a21fc79b6bc097d8b0e47498903a649a111127 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 5 Jun 2012 13:17:32 +0300 Subject: [PATCH 189/475] OMAPDSS: fix registration of DPI and SDI devices The omapdss arch initialization code registers all the output devices as omap_devices. However, DPI and SDI are not proper omap_devices, as they do not have any corresponding HWMOD. This leads to crashes or problems when the platform code tries to use omap_device functions for DPI and SDI devices. One such crash was reported by John Stultz : [ 18.756835] Unable to handle kernel NULL pointer dereference at virtual addr8 [ 18.765319] pgd = ea6b8000 [ 18.768188] [00000018] *pgd=aa942831, *pte=00000000, *ppte=00000000 [ 18.774749] Internal error: Oops: 17 [#1] SMP ARM [ 18.779663] Modules linked in: [ 18.782836] CPU: 0 Not tainted (3.5.0-rc1-dirty #456) [ 18.788482] PC is at _od_resume_noirq+0x1c/0x78 [ 18.793212] LR is at _od_resume_noirq+0x6c/0x78 [ 18.797943] pc : [] lr : [] psr: 20000113 [ 18.797943] sp : ec3abe80 ip : ec3abdb8 fp : 00000006 [ 18.809936] r10: ec1148b8 r9 : c08a48f0 r8 : c00307d0 [ 18.815368] r7 : 00000000 r6 : 00000000 r5 : ec114800 r4 : ec114808 [ 18.822174] r3 : 00000000 r2 : 00000000 r1 : ec154fe8 r0 : 00000006 [ 18.829010] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user [ 18.836456] Control: 10c5387d Table: aa6b804a DAC: 00000015 [ 18.842437] Process sh (pid: 1139, stack limit = 0xec3aa2f0) [ 18.848358] Stack: (0xec3abe80 to 0xec3ac000) DPI and SDI can be plain platform_devices. This patch changes the registration from omap_device_register() to platform_device_add(). Signed-off-by: Tomi Valkeinen Reported-by: John Stultz Tested-by: Jean Pihet --- arch/arm/mach-omap2/display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 54d49ddb9b8..5fb47a14f4b 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -271,9 +271,9 @@ static struct platform_device *create_simple_dss_pdev(const char *pdev_name, goto err; } - r = omap_device_register(pdev); + r = platform_device_add(pdev); if (r) { - pr_err("Could not register omap_device for %s\n", pdev_name); + pr_err("Could not register platform_device for %s\n", pdev_name); goto err; } From bda197f5d71d56b95a84c0ccbcb8ce2691106cca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:22:14 -0400 Subject: [PATCH 190/475] NFSv4.1: Ensure we clear session state flags after a session creation Both nfs4_reset_session and nfs41_init_clientid need to clear all the session related state flags on success. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c679b9ecef6..f38300e9f17 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -244,6 +244,16 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) return nfs4_wait_on_slot_tbl(&ses->fc_slot_table); } +static void nfs41_finish_session_reset(struct nfs_client *clp) +{ + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + /* create_session negotiated new slot table */ + clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); + clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + nfs41_setup_state_renewal(clp); +} + int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { int status; @@ -259,8 +269,7 @@ do_confirm: status = nfs4_proc_create_session(clp, cred); if (status != 0) goto out; - clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - nfs41_setup_state_renewal(clp); + nfs41_finish_session_reset(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: return status; @@ -1772,16 +1781,9 @@ static int nfs4_reset_session(struct nfs_client *clp) status = nfs4_handle_reclaim_lease_error(clp, status); goto out; } - clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); - /* create_session negotiated new slot table */ - clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); - clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + nfs41_finish_session_reset(clp); dprintk("%s: session reset was successful for server %s!\n", __func__, clp->cl_hostname); - - /* Let the state manager reestablish state */ - if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) - nfs41_setup_state_renewal(clp); out: if (cred) put_rpccred(cred); From c2238f10e0c34a85a2a555c8a197316d1ca3fb7e Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Tue, 5 Jun 2012 10:35:02 +0800 Subject: [PATCH 191/475] x86/mce: Fix the MCE poll timer logic In commit 82f7af09 (x86/mce: Cleanup timer mess), Thomas just forgot the "/ 2" there while cleaning up. Signed-off-by: Chen Gong Acked-by: Thomas Gleixner Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 98003bfc555..d6b18a4d0b9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1266,7 +1266,7 @@ static void mce_timer_fn(unsigned long data) */ iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq()) - iv = max(iv, (unsigned long) HZ/100); + iv = max(iv / 2, (unsigned long) HZ/100); else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); __this_cpu_write(mce_next_interval, iv); From fffaee365fded09f9ebf2db19066065fa54323c3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 5 Jun 2012 21:36:33 +0400 Subject: [PATCH 192/475] radix-tree: fix contiguous iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes bug in macro radix_tree_for_each_contig(). If radix_tree_next_slot() sees NULL in next slot it returns NULL, but following radix_tree_next_chunk() switches iterating into next chunk. As result iterating becomes non-contiguous and breaks vfs "splice" and all its users. Signed-off-by: Konstantin Khlebnikov Reported-and-bisected-by: Hans de Bruin Reported-and-bisected-by: Ondrej Zary Reported-bisected-and-tested-by: Toralf Förster Link: https://lkml.org/lkml/2012/6/5/64 Cc: stable # 3.4.x Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 5 ++++- lib/radix-tree.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0d04cd69ab9..ffc444c38b0 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index++; if (likely(*slot)) return slot; - if (flags & RADIX_TREE_ITER_CONTIG) + if (flags & RADIX_TREE_ITER_CONTIG) { + /* forbid switching to the next chunk */ + iter->next_index = 0; break; + } } } return NULL; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d7c878cc006..e7964296fd5 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -686,6 +686,9 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, * during iterating; it can be zero only at the beginning. * And we cannot overflow iter->next_index in a single step, * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. + * + * This condition also used by radix_tree_next_slot() to stop + * contiguous iterating, and forbid swithing to the next chunk. */ index = iter->next_index; if (!index && iter->index) From f69a23b795d6ee3673583146ed7efcbaaa5add18 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 19:56:06 +0200 Subject: [PATCH 193/475] iwlwifi: fix double free/complete in firmware loading Linus reported that due to mac80211 failing to register the device (due to WoWLAN) his machine crashed etc. as we double-freed the vmalloc() firmware area. His patch to fix it was very similar to this one but I noticed that there's another bug in the area: we complete the completion before starting, so since we're running in a work struct context stop() could be called while in the middle of start() which will almost certainly lead to issues. Make a modification similar to his to avoid the double- free but also move the completion to another spot so it is only done after start() either finished or failed so that stop() can have a consistent state. Reported-by: Linus Torvalds Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-drv.c b/drivers/net/wireless/iwlwifi/iwl-drv.c index d742900969e..fac67a526a3 100644 --- a/drivers/net/wireless/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/iwlwifi/iwl-drv.c @@ -861,13 +861,18 @@ static void iwl_ucode_callback(const struct firmware *ucode_raw, void *context) /* We have our copies now, allow OS release its copies */ release_firmware(ucode_raw); - complete(&drv->request_firmware_complete); drv->op_mode = iwl_dvm_ops.start(drv->trans, drv->cfg, &drv->fw); if (!drv->op_mode) - goto out_free_fw; + goto out_unbind; + /* + * Complete the firmware request last so that + * a driver unbind (stop) doesn't run while we + * are doing the start() above. + */ + complete(&drv->request_firmware_complete); return; try_again: From 0e1fa7ef25004b9c1a14147bce61c15c2f1c6744 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 09:38:35 +0200 Subject: [PATCH 194/475] iwlwifi: unregister LEDs if mac80211 registration fails Otherwise the LEDs stick around and cause issues the next time around since they're still there but not really hooked up. Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-mac80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-mac80211.c b/drivers/net/wireless/iwlwifi/iwl-mac80211.c index 5d158ca9d24..3ee23134c02 100644 --- a/drivers/net/wireless/iwlwifi/iwl-mac80211.c +++ b/drivers/net/wireless/iwlwifi/iwl-mac80211.c @@ -251,6 +251,7 @@ int iwlagn_mac_setup_register(struct iwl_priv *priv, ret = ieee80211_register_hw(priv->hw); if (ret) { IWL_ERR(priv, "Failed to register hw (error %d)\n", ret); + iwl_leds_exit(priv); return ret; } priv->mac80211_registered = 1; From 463454b5dbd8dbab6e2fc6c557329e5b811b9c32 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 12:16:50 +0200 Subject: [PATCH 195/475] cfg80211: fix interface combinations check If a given interface combination doesn't contain a required interface type then we missed checking that and erroneously allowed it even though iface type wasn't there at all. Add a check that makes sure that all interface types are accounted for. Cc: stable@kernel.org Reported-by: Mohammed Shafi Shajakhan Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/util.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 55d99466bab..8f2d68fc3a4 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -935,6 +935,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { struct wireless_dev *wdev_iter; + u32 used_iftypes = BIT(iftype); int num[NUM_NL80211_IFTYPES]; int total = 1; int i, j; @@ -961,6 +962,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, num[wdev_iter->iftype]++; total++; + used_iftypes |= BIT(wdev_iter->iftype); } mutex_unlock(&rdev->devlist_mtx); @@ -970,6 +972,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { const struct ieee80211_iface_combination *c; struct ieee80211_iface_limit *limits; + u32 all_iftypes = 0; c = &rdev->wiphy.iface_combinations[i]; @@ -984,6 +987,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, if (rdev->wiphy.software_iftypes & BIT(iftype)) continue; for (j = 0; j < c->n_limits; j++) { + all_iftypes |= limits[j].types; if (!(limits[j].types & BIT(iftype))) continue; if (limits[j].max < num[iftype]) @@ -991,7 +995,20 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, limits[j].max -= num[iftype]; } } - /* yay, it fits */ + + /* + * Finally check that all iftypes that we're currently + * using are actually part of this combination. If they + * aren't then we can't use this combination and have + * to continue to the next. + */ + if ((all_iftypes & used_iftypes) != used_iftypes) + goto cont; + + /* + * This combination covered all interface types and + * supported the requested numbers, so we're good. + */ kfree(limits); return 0; cont: From 256cccbeca6e53c833c98a3651d7bac38ced4cec Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Fri, 1 Jun 2012 10:13:09 +0200 Subject: [PATCH 196/475] MAINTAINERS: update util-linux info Signed-off-by: Karel Zak Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6a52bb4a4fc..f55cf8f084d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7298,11 +7298,11 @@ F: Documentation/DocBook/uio-howto.tmpl F: drivers/uio/ F: include/linux/uio*.h -UTIL-LINUX-NG PACKAGE +UTIL-LINUX PACKAGE M: Karel Zak -L: util-linux-ng@vger.kernel.org -W: http://kernel.org/~kzak/util-linux-ng/ -T: git git://git.kernel.org/pub/scm/utils/util-linux-ng/util-linux-ng.git +L: util-linux@vger.kernel.org +W: http://en.wikipedia.org/wiki/Util-linux +T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git S: Maintained UVESAFB DRIVER From cb05d8dedefa3066bf5d74ef88c6ca6cf4bd1c87 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 23 May 2012 14:02:00 +0200 Subject: [PATCH 197/475] drm/i915: fix up ivb plane 3 pageflips Or at least plug another gapping hole. Apparrently hw desingers only moved the bit field, but did not bother ot re-enumerate the planes when adding support for a 3rd pipe. Discovered by i-g-t/flip_test. This may or may not fix the reference bugzilla, because that one smells like we have still larger fish to fry. v2: Fixup the impossible case to catch programming errors, noticed by Chris Wilson. References: https://bugs.freedesktop.org/show_bug.cgi?id=50069 Acked-by: Chris Wilson Tested-by: Eugeni Dodonov Eugeni Dodonov Cc: stable@vger.kernel.org Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ drivers/gpu/drm/i915/intel_display.c | 19 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2d49b9507ed..76bc2756d7c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -210,6 +210,14 @@ #define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) #define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) #define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) +/* IVB has funny definitions for which plane to flip. */ +#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) + #define MI_SET_CONTEXT MI_INSTR(0x18, 0) #define MI_MM_SPACE_GTT (1<<8) #define MI_MM_SPACE_PHYSICAL (0<<8) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 91478942090..e0aa064def3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6158,17 +6158,34 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; + uint32_t plane_bit = 0; int ret; ret = intel_pin_and_fence_fb_obj(dev, obj, ring); if (ret) goto err; + switch(intel_crtc->plane) { + case PLANE_A: + plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A; + break; + case PLANE_B: + plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B; + break; + case PLANE_C: + plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C; + break; + default: + WARN_ONCE(1, "unknown plane in flip command\n"); + ret = -ENODEV; + goto err; + } + ret = intel_ring_begin(ring, 4); if (ret) goto err_unpin; - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | (intel_crtc->plane << 19)); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); intel_ring_emit(ring, (obj->gtt_offset)); intel_ring_emit(ring, (MI_NOOP)); From cdf66442fab82916fe38f928b4f91815195a294c Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 5 Jun 2012 14:59:54 -0400 Subject: [PATCH 198/475] NFS4: Set parsed mount data version to 4 This patch only affects mounting through "-t nfs4" since it doesn't set up an nfs version to use in the mount data. The nfs client was trying to mount using NFS v0, causing either a BUG() or a protocol not supported message. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ff656c02268..bdd673141e9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2637,6 +2637,8 @@ static int nfs4_validate_mount_data(void *options, if (data == NULL) goto out_no_data; + args->version = 4; + switch (data->version) { case 1: if (data->host_addrlen > sizeof(args->nfs_server.address)) From aa69cb8c1e72b027548f9751e6377a7a7e8bb8fd Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Thu, 31 May 2012 19:51:37 +0800 Subject: [PATCH 199/475] MAINTAINERS: add linux-leds mail list address and git URL Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f55cf8f084d..f935a0cef40 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4103,6 +4103,8 @@ F: drivers/scsi/53c700* LED SUBSYSTEM M: Bryan Wu M: Richard Purdie +L: linux-leds@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds.git S: Maintained F: drivers/leds/ F: include/linux/leds.h From 5c888aa43ee9872efe2a09e8c9f03db84f60dd28 Mon Sep 17 00:00:00 2001 From: Emil Goode Date: Tue, 29 May 2012 18:57:00 +0200 Subject: [PATCH 200/475] video: bfin_adv7393fb: Convert to kstrtouint_from_user This patch removes a call to the deprecated simple_strtoul function and simplifies the code by replacing two function calls with one call to kstrtouint_from_user. -Simplify the adv7393_write_proc function by replacing the simple_strtoul and copy_from_user calls with one call to kstrtouint_from_user. -Change the count parameter from unsigned long to size_t as this is the type that the kstrtouint_from_user function expects. (size_t is what will be passed to the adv7393_write_proc function by the proc write handler function proc_file_write anyway) Signed-off-by: Emil Goode Signed-off-by: Florian Tobias Schandinat --- drivers/video/bfin_adv7393fb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/bfin_adv7393fb.c b/drivers/video/bfin_adv7393fb.c index 33ea874c87d..9bdd4b0c18c 100644 --- a/drivers/video/bfin_adv7393fb.c +++ b/drivers/video/bfin_adv7393fb.c @@ -353,18 +353,16 @@ adv7393_read_proc(char *page, char **start, off_t off, static int adv7393_write_proc(struct file *file, const char __user * buffer, - unsigned long count, void *data) + size_t count, void *data) { struct adv7393fb_device *fbdev = data; - char line[8]; unsigned int val; int ret; - ret = copy_from_user(line, buffer, count); + ret = kstrtouint_from_user(buffer, count, 0, &val); if (ret) return -EFAULT; - val = simple_strtoul(line, NULL, 0); adv7393_write(fbdev->client, val >> 8, val & 0xff); return count; From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 18:32:03 -0400 Subject: [PATCH 201/475] NFS: Fix a commit bug The new commit code fails to copy the verifier into the wb_verf field of _all_ the nfs_page structures; it only copies it into the first entry. The consequence is that most requests end up failing to match in nfs_commit_release. Fix is to copy the verifier into the req->wb_verf field in nfs_write_completion. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/direct.c | 4 ++-- fs/nfs/write.c | 7 ++++--- include/linux/nfs_xdr.h | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 23d170bc44f..b5385a7efd5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -710,12 +710,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, &req->wb_verf, + memcpy(&dreq->verf, hdr->verf, sizeof(dreq->verf)); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { + if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e6fe3d69d14..4d6861c0dc1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void) INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); + hdr->verf = &p->verf; } return p; } @@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata) struct nfs_write_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; - struct nfs_page *req = hdr->req; if ((status >= 0) && nfs_write_need_commit(data)) { spin_lock(&hdr->lock); if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) + memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); + else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7519baef025..8aadd90b808 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1237,6 +1237,7 @@ struct nfs_pgio_header { struct list_head rpc_list; atomic_t refcnt; struct nfs_page *req; + struct nfs_writeverf *verf; struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; @@ -1274,6 +1275,7 @@ struct nfs_write_data { struct nfs_write_header { struct nfs_pgio_header header; struct nfs_write_data rpc_data; + struct nfs_writeverf verf; }; struct nfs_mds_commit_info { From a2c658505bf5c75516ee0a79287223e86a2474af Mon Sep 17 00:00:00 2001 From: "nagalakshmi.nandigama@lsi.com" Date: Tue, 17 Apr 2012 11:25:04 +0530 Subject: [PATCH 202/475] [SCSI] mpt2sas: Fix unsafe using smp_processor_id() in preemptible When CONFIG_DEBUG_PREEMPT is enabled, bug is observed in the smp_processor_id(). This is because smp_processor_id() is not called in preempt safe condition. To fix this issue, use raw_smp_processor_id instead of smp_processor_id. Signed-off-by: Nagalakshmi Nandigama CC: stable@vger.kernel.org Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 6102ef2cb2d..9d46fcbe775 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -1792,7 +1792,7 @@ static inline void _base_writeq(__u64 b, volatile void __iomem *addr, static inline u8 _base_get_msix_index(struct MPT2SAS_ADAPTER *ioc) { - return ioc->cpu_msix_table[smp_processor_id()]; + return ioc->cpu_msix_table[raw_smp_processor_id()]; } /** From 470a54207ccf7045a59df727573bd9d148988582 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Sat, 26 May 2012 06:08:48 +0000 Subject: [PATCH 203/475] e1000e: test for valid check_reset_block function pointer commit 44abd5c12767a8c567dc4e45fd9aec3b13ca85e0 introduced NULL pointer dereferences when attempting to access the check_reset_block function pointer on 8257x and 80003es2lan non-copper devices. This fix should be applied back through 3.4. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ethtool.c | 6 ++++-- drivers/net/ethernet/intel/e1000e/mac.c | 2 +- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- drivers/net/ethernet/intel/e1000e/phy.c | 8 +++++--- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index d863075df7a..905e2147d91 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -258,7 +258,8 @@ static int e1000_set_settings(struct net_device *netdev, * When SoL/IDER sessions are active, autoneg/speed/duplex * cannot be changed */ - if (hw->phy.ops.check_reset_block(hw)) { + if (hw->phy.ops.check_reset_block && + hw->phy.ops.check_reset_block(hw)) { e_err("Cannot change link characteristics when SoL/IDER is active.\n"); return -EINVAL; } @@ -1615,7 +1616,8 @@ static int e1000_loopback_test(struct e1000_adapter *adapter, u64 *data) * PHY loopback cannot be performed if SoL/IDER * sessions are active */ - if (hw->phy.ops.check_reset_block(hw)) { + if (hw->phy.ops.check_reset_block && + hw->phy.ops.check_reset_block(hw)) { e_err("Cannot do PHY loopback test when SoL/IDER is active.\n"); *data = 0; goto out; diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index 026e8b3ab52..a1343992848 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -709,7 +709,7 @@ s32 e1000e_setup_link_generic(struct e1000_hw *hw) * In the case of the phy reset being blocked, we already have a link. * We do not need to set it up again. */ - if (hw->phy.ops.check_reset_block(hw)) + if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) return 0; /* diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a4b0435b00d..31d37a2b5ba 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6237,7 +6237,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev, adapter->hw.phy.ms_type = e1000_ms_hw_default; } - if (hw->phy.ops.check_reset_block(hw)) + if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) e_info("PHY reset is blocked due to SOL/IDER session.\n"); /* Set initial default active device features */ @@ -6404,7 +6404,7 @@ err_register: if (!(adapter->flags & FLAG_HAS_AMT)) e1000e_release_hw_control(adapter); err_eeprom: - if (!hw->phy.ops.check_reset_block(hw)) + if (hw->phy.ops.check_reset_block && !hw->phy.ops.check_reset_block(hw)) e1000_phy_hw_reset(&adapter->hw); err_hw_init: kfree(adapter->tx_ring); diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 0334d013bc3..b860d4f7ea2 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -2155,9 +2155,11 @@ s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw) s32 ret_val; u32 ctrl; - ret_val = phy->ops.check_reset_block(hw); - if (ret_val) - return 0; + if (phy->ops.check_reset_block) { + ret_val = phy->ops.check_reset_block(hw); + if (ret_val) + return 0; + } ret_val = phy->ops.acquire(hw); if (ret_val) From 146d4cc98d660cbdeae52ae35a4d038d0dab6da5 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 May 2012 05:59:26 +0000 Subject: [PATCH 204/475] ixgbe: fix_features rxvlan is independent of DCB and needs to be set DCB can be used independent of if RX VLAN stripping is enabled or disabled so remove erroneous check. Also enable or disable VLAN stripping when features are applied so hardware and feature flags are in sync. CC: Alexander Duyck Signed-off-by: John Fastabend Tested-by: Marcus Dennis Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bf20457ea23..b8b208720fb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3607,10 +3607,6 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) if (hw->mac.type == ixgbe_mac_82598EB) netif_set_gso_max_size(adapter->netdev, 32768); - - /* Enable VLAN tag insert/strip */ - adapter->netdev->features |= NETIF_F_HW_VLAN_RX; - hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true); #ifdef IXGBE_FCOE @@ -6701,11 +6697,6 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev, { struct ixgbe_adapter *adapter = netdev_priv(netdev); -#ifdef CONFIG_DCB - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) - features &= ~NETIF_F_HW_VLAN_RX; -#endif - /* return error if RXHASH is being enabled when RSS is not supported */ if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) features &= ~NETIF_F_RXHASH; @@ -6718,7 +6709,6 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev, if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)) features &= ~NETIF_F_LRO; - return features; } @@ -6766,6 +6756,11 @@ static int ixgbe_set_features(struct net_device *netdev, need_reset = true; } + if (features & NETIF_F_HW_VLAN_RX) + ixgbe_vlan_strip_enable(adapter); + else + ixgbe_vlan_strip_disable(adapter); + if (changed & NETIF_F_RXALL) need_reset = true; From 43e95f11ac0b06fec2c58431b092a60934d730e2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 May 2012 06:12:17 +0000 Subject: [PATCH 205/475] ixgbe: IXGBE_RXD_STAT_VP set even with Rx stripping enabled The hardware bit IXGBE_RXD_STAT_VP appears to be set even when Rx stripping is disabled. This results in passing frames up the stack which do not have the 802.1Q tag stripped but have the tci bits set as if it was. Working around this with a check for the feature flag bit. I would welcome any better ideas or a pointer to exactly which bits in the hardware register need to be cleared to get the IXGBE_RXD_STAT_VP bit to be set per data sheet. Signed-off-by: John Fastabend Acked-by: Alexander Duyck Tested-by: Marcus Dennis Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index b8b208720fb..17ad6a3c1be 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1390,6 +1390,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { + struct net_device *dev = rx_ring->netdev; + ixgbe_update_rsc_stats(rx_ring, skb); ixgbe_rx_hash(rx_ring, rx_desc, skb); @@ -1401,14 +1403,15 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, skb); #endif - if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { + if ((dev->features & NETIF_F_HW_VLAN_RX) && + ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); __vlan_hwaccel_put_tag(skb, vid); } skb_record_rx_queue(skb, rx_ring->queue_index); - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + skb->protocol = eth_type_trans(skb, dev); } static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, From 958fb3c51295764599d6abce87e1a01ace897a3e Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Tue, 5 Jun 2012 10:35:02 +0800 Subject: [PATCH 206/475] x86/mce: Fix the MCE poll timer logic In commit 82f7af09 ("x86/mce: Cleanup timer mess), Thomas just forgot the "/ 2" there while cleaning up. Signed-off-by: Chen Gong Acked-by: Thomas Gleixner Cc: bp@amd64.org Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1338863702-9245-1-git-send-email-gong.chen@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0a687fd185e..a97f3c4a394 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1274,7 +1274,7 @@ static void mce_timer_fn(unsigned long data) */ iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq()) - iv = max(iv, (unsigned long) HZ/100); + iv = max(iv / 2, (unsigned long) HZ/100); else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); __this_cpu_write(mce_next_interval, iv); From 436d03faf6961b30e13b2d0967aea9d772d6cf44 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 00:09:11 +0900 Subject: [PATCH 207/475] x86/decoder: Fix bsr/bsf/jmpe decoding with operand-size prefix Fix the x86 instruction decoder to decode bsr/bsf/jmpe with operand-size prefix (66h). This fixes the test case failure reported by Linus, attached below. bsf/bsr/jmpe have a special encoding. Opcode map in Intel Software Developers Manual vol2 says they have TZCNT/LZCNT variants if it has F3h prefix. However, there is no information if it has other 66h or F2h prefixes. Current instruction decoder supposes that those are bad instructions, but it actually accepts at least operand-size prefixes. H. Peter Anvin further explains: " TZCNT/LZCNT are F3 + BSF/BSR exactly because the F2 and F3 prefixes have historically been no-ops with most instructions. This allows software to unconditionally use the prefixed versions and get TZCNT/LZCNT on the processors that have them if they don't care about the difference. " This fixes errors reported by test_get_len: Warning: arch/x86/tools/test_get_len found difference at :ffffffff81036d87 Warning: ffffffff81036de5: 66 0f bc c2 bsf %dx,%ax Warning: objdump says 4 bytes, but insn_get_length() says 3 Warning: arch/x86/tools/test_get_len found difference at :ffffffff81036ea6 Warning: ffffffff81036f04: 66 0f bd c2 bsr %dx,%ax Warning: objdump says 4 bytes, but insn_get_length() says 3 Warning: decoded and checked 13298882 instructions with 2 warnings Reported-by: Linus Torvalds Reported-by: Pekka Enberg Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: Link: http://lkml.kernel.org/r/20120604150911.22338.43296.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 8 ++++---- arch/x86/tools/gen-insn-attr-x86.awk | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 81913790442..5d7e51f3fd2 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -28,7 +28,7 @@ # - (66): the last prefix is 0x66 # - (F3): the last prefix is 0xF3 # - (F2): the last prefix is 0xF2 -# +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) Table: one byte opcode Referrer: @@ -515,12 +515,12 @@ b4: LFS Gv,Mp b5: LGS Gv,Mp b6: MOVZX Gv,Eb b7: MOVZX Gv,Ew -b8: JMPE | POPCNT Gv,Ev (F3) +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) b9: Grp10 (1A) ba: Grp8 Ev,Ib (1A) bb: BTC Ev,Gv -bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) -bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) be: MOVSX Gv,Eb bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 5f6a5b6c3a1..ddcf39b1a18 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -66,9 +66,10 @@ BEGIN { rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO - lprefix1_expr = "\\(66\\)" + lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\(F2\\)" + lprefix3_expr = "\\((F2|!F3)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 # All opcodes starting with lower-case 'v' or with (v1) superscript @@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(ext, lprefix1_expr)) { lptable1[idx] = add_flags(lptable1[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix2_expr)) { + } + if (match(ext, lprefix2_expr)) { lptable2[idx] = add_flags(lptable2[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix3_expr)) { + } + if (match(ext, lprefix3_expr)) { lptable3[idx] = add_flags(lptable3[idx],flags) variant = "INAT_VARIANT" - } else { + } + if (!match(ext, lprefix_expr)){ table[idx] = add_flags(table[idx],flags) } } From 1a87fc1ec7b05b9bc60df9dc52297d4c225d7f1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2012 11:33:21 +0200 Subject: [PATCH 208/475] x86: mce: Add the dropped timer interval init back commit 82f7af09 ("x86/mce: Cleanup timer mess) dropped the initialization of the per cpu timer interval. Duh :( Restore the previous behaviour. Reported-by: Chen Gong Cc: bp@amd64.org Cc: tony.luck@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a97f3c4a394..da27c5d2168 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1557,7 +1557,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long iv = __this_cpu_read(mce_next_interval); + unsigned long iv = check_interval * HZ; setup_timer(t, mce_timer_fn, smp_processor_id()); From 8a173b1476d126674104c7c5c6cef0bcd824b001 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 29 May 2012 11:18:44 +0200 Subject: [PATCH 209/475] spinlock: Indicate that a lockup is only suspected On an over-committed KVM system we got a: "BUG: spinlock lockup on CPU#2, swapper/2/0" message on the heavily contended virtio blk spinlock. While we might want to reconsider the locking of virtio-blk (lock is held while switching to the host) this patch tries to make the message clearer: the lockup is only suspected. Signed-off-by: Christian Borntraeger Cc: Rusty Russell Cc: Michael S. Tsirkin Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338283124-7063-1-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar --- lib/spinlock_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index d0ec4f3d159..e91fbc23fff 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -118,7 +118,7 @@ static void __spin_lock_debug(raw_spinlock_t *lock) /* lockup suspected: */ if (print_once) { print_once = 0; - spin_dump(lock, "lockup"); + spin_dump(lock, "lockup suspected"); #ifdef CONFIG_SMP trigger_all_cpu_backtrace(); #endif From aff5a62d52ff03956ff6992b9fe4b561fd855804 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Tue, 5 Jun 2012 15:00:31 -0400 Subject: [PATCH 210/475] x86/gart: Fix kmemleak warning aperture_64.c now is using memblock, the previous kmemleak_ignore() for alloc_bootmem() should be removed then. Otherwise, with kmemleak enabled, kernel will throw warnings like: [ 0.000000] kmemleak: Trying to color unknown object at 0xffff8800c4000000 as Black [ 0.000000] Pid: 0, comm: swapper/0 Not tainted 3.5.0-rc1-next-20120605+ #130 [ 0.000000] Call Trace: [ 0.000000] [] paint_ptr+0x66/0xc0 [ 0.000000] [] kmemleak_ignore+0x2b/0x60 [ 0.000000] [] kmemleak_init+0x217/0x2c1 [ 0.000000] [] start_kernel+0x32d/0x3eb [ 0.000000] [] ? repair_env_string+0x5a/0x5a [ 0.000000] [] x86_64_start_reservations+0x131/0x135 [ 0.000000] [] ? early_idt_handlers+0x120/0x120 [ 0.000000] [] x86_64_start_kernel+0x102/0x111 [ 0.000000] kmemleak: Early log backtrace: [ 0.000000] [] kmemleak_ignore+0x4b/0x60 [ 0.000000] [] gart_iommu_hole_init+0x3e7/0x547 [ 0.000000] [] pci_iommu_alloc+0x44/0x6f [ 0.000000] [] mem_init+0x19/0xec [ 0.000000] [] start_kernel+0x1ea/0x3eb [ 0.000000] [] x86_64_start_reservations+0x131/0x135 [ 0.000000] [] x86_64_start_kernel+0x102/0x111 [ 0.000000] [] 0xffffffffffffffff Signed-off-by: Xiaotian Feng Cc: Xiaotian Feng Cc: Tejun Heo Link: http://lkml.kernel.org/r/1338922831-2847-1-git-send-email-xtfeng@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 6e76c191a83..d5fd66f0d4c 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void) return 0; } memblock_reserve(addr, aper_size); - /* - * Kmemleak should not scan this block as it may not be mapped via the - * kernel direct mapping. - */ - kmemleak_ignore(phys_to_virt(addr)); printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", aper_size >> 10, addr); insert_aperture_resource((u32)addr, aper_size); From 4af463d28f1a026e25c0b879fac2a0d2b7bff599 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Mon, 4 Jun 2012 11:42:32 +0900 Subject: [PATCH 211/475] x86/numa: Set numa_nodes_parsed at acpi_numa_memory_affinity_init() When hot-adding a CPU, the system outputs following messages since node_to_cpumask_map[2] was not allocated memory. Booting Node 2 Processor 32 APIC 0xc0 node_to_cpumask_map[2] NULL Pid: 0, comm: swapper/32 Tainted: G A 3.3.5-acd #21 Call Trace: [] debug_cpumask_set_cpu+0x155/0x160 [] ? add_timer_on+0xaa/0x120 [] numa_add_cpu+0x1e/0x22 [] identify_cpu+0x1df/0x1e4 [] identify_econdary_cpu+0x16/0x1d [] smp_store_cpu_info+0x3c/0x3e [] smp_callin+0x139/0x1be [] start_secondary+0x13/0xeb The reason is that the bit of node 2 was not set at numa_nodes_parsed. numa_nodes_parsed is set by only acpi_numa_processor_affinity_init / acpi_numa_x2apic_affinity_init. Thus even if hot-added memory which is same PXM as hot-added CPU is written in ACPI SRAT Table, if the hot-added CPU is not written in ACPI SRAT table, numa_nodes_parsed is not set. But according to ACPI Spec Rev 5.0, it says about ACPI SRAT table as follows: This optional table provides information that allows OSPM to associate processors and memory ranges, including ranges of memory provided by hot-added memory devices, with system localities / proximity domains and clock domains. It means that ACPI SRAT table only provides information for CPUs present at boot time and for memory including hot-added memory. So hot-added memory is written in ACPI SRAT table, but hot-added CPU is not written in it. Thus numa_nodes_parsed should be set by not only acpi_numa_processor_affinity_init / acpi_numa_x2apic_affinity_init but also acpi_numa_memory_affinity_init for the case. Additionally, if system has cpuless memory node, acpi_numa_processor_affinity_init / acpi_numa_x2apic_affinity_init cannot set numa_nodes_parseds since these functions cannot find cpu description for the node. In this case, numa_nodes_parsed needs to be set by acpi_numa_memory_affinity_init. Signed-off-by: Yasuaki Ishimatsu Acked-by: David Rientjes Acked-by: KOSAKI Motohiro Cc: liuj97@gmail.com Cc: kosaki.motohiro@gmail.com Link: http://lkml.kernel.org/r/4FCC2098.4030007@jp.fujitsu.com [ merged it ] Signed-off-by: Ingo Molnar --- arch/x86/mm/srat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 732af3a9618..4599c3e8bcb 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -176,6 +176,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) return; } + node_set(node, numa_nodes_parsed); + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1); From 7071f6b2889bb41bea61891d8a3e6e70517ef5e6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 31 May 2012 23:20:25 +0200 Subject: [PATCH 212/475] x86/intel/moorestown: Change intel_scu_devices_create() to __devinit The allmodconfig hits: WARNING: vmlinux.o(.text+0x6553d): Section mismatch in reference from the function intel_scu_devices_create() to the function .devinit.text: spi_register_board_info() [...] This patch marks intel_scu_devices_create() as devinit because it only calls a devinit function, spi_register_board_info(). Signed-off-by: Sebastian Andrzej Siewior Cc: Alan Cox Cc: Kirill A. Shutemov Cc: Mika Westerberg Cc: Samuel Ortiz Cc: Feng Tang Link: http://lkml.kernel.org/r/20120531212025.GA8519@breakpoint.cc Signed-off-by: Ingo Molnar --- arch/x86/platform/mrst/mrst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e31bcd8f2ee..fd41a9262d6 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier); EXPORT_SYMBOL_GPL(intel_scu_notifier); /* Called by IPC driver */ -void intel_scu_devices_create(void) +void __devinit intel_scu_devices_create(void) { int i; From 55c844a4dd16a4d1fdc0cf2a283ec631a02ec448 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 May 2012 23:15:41 +0800 Subject: [PATCH 213/475] x86/reboot: Fix a warning message triggered by stop_other_cpus() When rebooting our 24 CPU Westmere servers with 3.4-rc6, we always see this warning msg: Restarting system. machine restart ------------[ cut here ]------------ WARNING: at arch/x86/kernel/smp.c:125 native_smp_send_reschedule+0x74/0xa7() Hardware name: X8DTN Modules linked in: igb [last unloaded: scsi_wait_scan] Pid: 1, comm: systemd-shutdow Not tainted 3.4.0-rc6+ #22 Call Trace: [] warn_slowpath_common+0x7e/0x96 [] warn_slowpath_null+0x15/0x17 [] native_smp_send_reschedule+0x74/0xa7 [] trigger_load_balance+0x279/0x2a6 [] scheduler_tick+0xe0/0xe9 [] update_process_times+0x60/0x70 [] tick_sched_timer+0x68/0x92 [] __run_hrtimer+0xb3/0x13c [] ? tick_nohz_handler+0xd0/0xd0 [] hrtimer_interrupt+0xdb/0x198 [] smp_apic_timer_interrupt+0x81/0x94 [] apic_timer_interrupt+0x67/0x70 [] ? default_send_IPI_mask_allbutself_phys+0xb4/0xc4 [] physflat_send_IPI_allbutself+0x12/0x14 [] native_nmi_stop_other_cpus+0x8a/0xd6 [] native_machine_shutdown+0x50/0x67 [] machine_shutdown+0xa/0xc [] native_machine_restart+0x20/0x32 [] machine_restart+0xa/0xc [] kernel_restart+0x47/0x4c [] sys_reboot+0x13e/0x17c [] ? _raw_spin_unlock_bh+0x10/0x12 [] ? bdi_queue_work+0xcf/0xd8 [] ? __bdi_start_writeback+0xae/0xb7 [] ? iterate_supers+0xa3/0xb7 [] system_call_fastpath+0x16/0x1b ---[ end trace 320af5cb1cb60c5b ]--- The root cause seems to be the default_send_IPI_mask_allbutself_phys() takes quite some time (I measured it could be several ms) to complete sending NMIs to all the other 23 CPUs, and for HZ=250/1000 system, the time is long enough for a timer interrupt to happen, which will in turn trigger to kick load balance to a stopped CPU and cause this warning in native_smp_send_reschedule(). So disabling the local irq before stop_other_cpu() can fix this problem (tested 25 times reboot ok), and it is fine as there should be nobody caring the timer interrupt in such reboot stage. The latest 3.4 kernel slightly changes this behavior by sending REBOOT_VECTOR first and only send NMI_VECTOR if the REBOOT_VCTOR fails, and this patch is still needed to prevent the problem. Signed-off-by: Feng Tang Acked-by: Don Zickus Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120530231541.4c13433a@feng-i7 Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 79c45af8160..25b48edb847 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -639,9 +639,11 @@ void native_machine_shutdown(void) set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); /* - * O.K Now that I'm on the appropriate processor, - * stop all of the others. + * O.K Now that I'm on the appropriate processor, stop all of the + * others. Also disable the local irq to not receive the per-cpu + * timer interrupt which may trigger scheduler's load balance. */ + local_irq_disable(); stop_other_cpus(); #endif From f6175f5bfb4c9f2ed32758c95f765b529b1a7f15 Mon Sep 17 00:00:00 2001 From: Tomoki Sekiyama Date: Mon, 28 May 2012 18:09:18 +0900 Subject: [PATCH 214/475] x86/ioapic: Fix NULL pointer dereference on CPU hotplug after disabling irqs In current Linux, percpu variable `vector_irq' is not cleared on offlined cpus while disabling devices' irqs. If the cpu that has the disabled irqs in vector_irq is hotplugged, __setup_vector_irq() hits invalid irq vector and may crash. This bug can be reproduced as following; # echo 0 > /sys/devices/system/cpu/cpu7/online # modprobe -r some_driver_using_interrupts # vector_irq@cpu7 uncleared # echo 1 > /sys/devices/system/cpu/cpu7/online # kernel may crash This patch fixes this bug by clearing vector_irq in __clear_irq_vector() even if the cpu is offlined. Signed-off-by: Tomoki Sekiyama Acked-by: Thomas Gleixner Cc: yrl.pp-manager.tt@hitachi.com Cc: ltc-kernel@ml.yrl.intra.hitachi.co.jp Cc: Suresh Siddha Cc: Yinghai Lu Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/4FC340BE.7080101@hitachi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ac96561d1a9..5f0ff597437 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + for_each_cpu(cpu, cfg->domain) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; @@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) if (likely(!cfg->move_in_progress)) return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for_each_cpu(cpu, cfg->old_domain) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) From d8ce7263e1bc3b6b2b906fec0c5037bc27d21d6a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 May 2012 23:30:08 +0200 Subject: [PATCH 215/475] m68k: Use generic strncpy_from_user(), strlen_user(), and strnlen_user() Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/Kconfig | 2 + arch/m68k/include/asm/Kbuild | 2 + arch/m68k/include/asm/uaccess_mm.h | 11 +++-- arch/m68k/lib/uaccess.c | 74 ------------------------------ 4 files changed, 11 insertions(+), 78 deletions(-) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index cac5b6be572..14712012826 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -7,6 +7,8 @@ config M68K select GENERIC_IRQ_SHOW select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS select GENERIC_CPU_DEVICES + select GENERIC_STRNCPY_FROM_USER if MMU + select GENERIC_STRNLEN_USER if MMU select FPU if MMU select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1a922fad76f..eafa2539a8e 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,2 +1,4 @@ include include/asm-generic/Kbuild.asm header-y += cachectl.h + +generic-y += word-at-a-time.h diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h index 9c80cd515b2..472c891a4ae 100644 --- a/arch/m68k/include/asm/uaccess_mm.h +++ b/arch/m68k/include/asm/uaccess_mm.h @@ -379,12 +379,15 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) #define copy_from_user(to, from, n) __copy_from_user(to, from, n) #define copy_to_user(to, from, n) __copy_to_user(to, from, n) -long strncpy_from_user(char *dst, const char __user *src, long count); -long strnlen_user(const char __user *src, long n); +#define user_addr_max() \ + (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL) + +extern long strncpy_from_user(char *dst, const char __user *src, long count); +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); + unsigned long __clear_user(void __user *to, unsigned long n); #define clear_user __clear_user -#define strlen_user(str) strnlen_user(str, 32767) - #endif /* _M68K_UACCESS_H */ diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c index 5664386338d..5e97f2ee7c1 100644 --- a/arch/m68k/lib/uaccess.c +++ b/arch/m68k/lib/uaccess.c @@ -103,80 +103,6 @@ unsigned long __generic_copy_to_user(void __user *to, const void *from, } EXPORT_SYMBOL(__generic_copy_to_user); -/* - * Copy a null terminated string from userspace. - */ -long strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - char c; - - if (count <= 0) - return count; - - asm volatile ("\n" - "1: "MOVES".b (%2)+,%4\n" - " move.b %4,(%1)+\n" - " jeq 2f\n" - " subq.l #1,%3\n" - " jne 1b\n" - "2: sub.l %3,%0\n" - "3:\n" - " .section .fixup,\"ax\"\n" - " .even\n" - "10: move.l %5,%0\n" - " jra 3b\n" - " .previous\n" - "\n" - " .section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,10b\n" - " .previous" - : "=d" (res), "+a" (dst), "+a" (src), "+r" (count), "=&d" (c) - : "i" (-EFAULT), "0" (count)); - - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* - * Return the size of a string (including the ending 0) - * - * Return 0 on exception, a value greater than N if too long - */ -long strnlen_user(const char __user *src, long n) -{ - char c; - long res; - - asm volatile ("\n" - "1: subq.l #1,%1\n" - " jmi 3f\n" - "2: "MOVES".b (%0)+,%2\n" - " tst.b %2\n" - " jne 1b\n" - " jra 4f\n" - "\n" - "3: addq.l #1,%0\n" - "4: sub.l %4,%0\n" - "5:\n" - " .section .fixup,\"ax\"\n" - " .even\n" - "20: sub.l %0,%0\n" - " jra 5b\n" - " .previous\n" - "\n" - " .section __ex_table,\"a\"\n" - " .align 4\n" - " .long 2b,20b\n" - " .previous\n" - : "=&a" (res), "+d" (n), "=&d" (c) - : "0" (src), "r" (src)); - - return res; -} -EXPORT_SYMBOL(strnlen_user); - /* * Zero Userspace */ From ceb1cbac8eda66cf0f889def226b4e82f8ff857b Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Thu, 31 May 2012 13:07:38 +0530 Subject: [PATCH 216/475] sched/x86: Calculate booted cores after construction of sibling_mask Commit 316ad248307fb ("sched/x86: Rewrite set_cpu_sibling_map()") broke the booted_cores accounting. The problem is that the booted_cores accounting needs all the sibling links set up. So restore the second loop and add a comment as to why its needed. On qemu booted with -smp sockets=1,cores=2,threads=2; Before: $ grep cores /proc/cpuinfo cpu cores : 2 cpu cores : 1 cpu cores : 4 cpu cores : 3 With the patch: $ grep cores /proc/cpuinfo cpu cores : 2 cpu cores : 2 cpu cores : 2 cpu cores : 2 Reported-by: Prarit Bhargava Reported-by: Borislav Petkov Signed-off-by: Kamalesh Babulal Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120531073738.GH7511@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fd019d78b1f..3fab55bea29 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -382,6 +382,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) if ((i == cpu) || (has_mc && match_llc(c, o))) link_mask(llc_shared, cpu, i); + } + + /* + * This needs a separate iteration over the cpus because we rely on all + * cpu_sibling_mask links to be set-up. + */ + for_each_cpu(i, cpu_sibling_setup_mask) { + o = &cpu_data(i); + if ((i == cpu) || (has_mc && match_mc(c, o))) { link_mask(core, cpu, i); From 10717dcde10d09f9fcee53a12a4236af1a82b484 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Wed, 6 Jun 2012 14:52:51 +0800 Subject: [PATCH 217/475] sched/numa: Load balance between remote nodes Commit cb83b629b ("sched/numa: Rewrite the CONFIG_NUMA sched domain support") removed the NODE sched domain and started checking if the node distance in SLIT table is farther than REMOTE_DISTANCE, if so, it will lose the load balance chance at exec/fork/wake_affine points. But actually, even the node distance is farther than REMOTE_DISTANCE. Modern CPUs also has QPI like connections, which ensures that memory access is not too slow between nodes. So the above change in behavior on NUMA machine causes a performance regression on various benchmarks: hackbench, tbench, netperf, oltp, etc. This patch will recover the scheduler behavior to old mode on all my Intel platforms: NHM EP/EX, WSM EP, SNB EP/EP4S, and thus fixes the perfromance regressions. (all of them just have 2 kinds distance, 10, 21) Signed-off-by: Alex Shi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338965571-9812-1-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c46958e2612..6546083af3e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6321,7 +6321,7 @@ static int sched_domains_curr_level; static inline int sd_local_flags(int level) { - if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) + if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) return 0; return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; From 7f1b43936f0ecad14770634c021cf4a929aec74d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 May 2012 21:19:46 +0200 Subject: [PATCH 218/475] sched/rt: Fix lockdep annotation within find_lock_lowest_rq() Roland Dreier reported spurious, hard to trigger lockdep warnings within the scheduler - without any real lockup. This bit gives us the right clue: > [89945.640512] [] double_lock_balance+0x5a/0x90 > [89945.640568] [] push_rt_task+0xc6/0x290 if you look at that code you'll find the double_lock_balance() in question is the one in find_lock_lowest_rq() [yay for inlining]. Now find_lock_lowest_rq() has a bug.. it fails to use double_unlock_balance() in one exit path, if this results in a retry in push_rt_task() we'll call double_lock_balance() again, at which point we'll run into said lockdep confusion. Reported-by: Roland Dreier Acked-by: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1337282386.4281.77.camel@twins Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2a4e8dffbd6..573e1ca0110 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1562,7 +1562,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) task_running(rq, task) || !task->on_rq)) { - raw_spin_unlock(&lowest_rq->lock); + double_unlock_balance(rq, lowest_rq); lowest_rq = NULL; break; } From c1174876874dcf8986806e4dad3d7d07af20b439 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 May 2012 14:47:33 +0200 Subject: [PATCH 219/475] sched: Fix domain iteration Weird topologies can lead to asymmetric domain setups. This needs further consideration since these setups are typically non-minimal too. For now, make it work by adding an extra mask selecting which CPUs are allowed to iterate up. The topology that triggered it is the one from David Rientjes: 10 20 20 30 20 10 20 20 20 20 10 20 30 20 20 10 resulting in boxes that wouldn't even boot. Reported-by: David Rientjes Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 ++++++++ kernel/sched/core.c | 64 +++++++++++++++++++++++++++++++++++++------ kernel/sched/fair.c | 5 ++-- kernel/sched/sched.h | 2 ++ 4 files changed, 72 insertions(+), 10 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c5447..ac321d75347 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -876,6 +876,8 @@ struct sched_group_power { * Number of busy cpus in this group. */ atomic_t nr_busy_cpus; + + unsigned long cpumask[0]; /* iteration mask */ }; struct sched_group { @@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/* + * cpumask masking which cpus in the group are allowed to iterate up the domain + * tree. + */ +static inline struct cpumask *sched_group_mask(struct sched_group *sg) +{ + return to_cpumask(sg->sgp->cpumask); +} + /** * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. * @group: The group whose first cpu is to be returned. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6546083af3e..781acb91a50 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5994,6 +5994,44 @@ struct sched_domain_topology_level { struct sd_data data; }; +/* + * Build an iteration mask that can exclude certain CPUs from the upwards + * domain traversal. + * + * Asymmetric node setups can result in situations where the domain tree is of + * unequal depth, make sure to skip domains that already cover the entire + * range. + * + * In that case build_sched_domains() will have terminated the iteration early + * and our sibling sd spans will be empty. Domains should always include the + * cpu they're built on, so check that. + * + */ +static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) +{ + const struct cpumask *span = sched_domain_span(sd); + struct sd_data *sdd = sd->private; + struct sched_domain *sibling; + int i; + + for_each_cpu(i, span) { + sibling = *per_cpu_ptr(sdd->sd, i); + if (!cpumask_test_cpu(i, sched_domain_span(sibling))) + continue; + + cpumask_set_cpu(i, sched_group_mask(sg)); + } +} + +/* + * Return the canonical balance cpu for this group, this is the first cpu + * of this group that's also in the iteration mask. + */ +int group_balance_cpu(struct sched_group *sg) +{ + return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg)); +} + static int build_overlap_sched_groups(struct sched_domain *sd, int cpu) { @@ -6012,6 +6050,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) if (cpumask_test_cpu(i, covered)) continue; + child = *per_cpu_ptr(sdd->sd, i); + + /* See the comment near build_group_mask(). */ + if (!cpumask_test_cpu(i, sched_domain_span(child))) + continue; + sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), GFP_KERNEL, cpu_to_node(cpu)); @@ -6019,8 +6063,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) goto fail; sg_span = sched_group_cpus(sg); - - child = *per_cpu_ptr(sdd->sd, i); if (child->child) { child = child->child; cpumask_copy(sg_span, sched_domain_span(child)); @@ -6030,13 +6072,18 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) cpumask_or(covered, covered, sg_span); sg->sgp = *per_cpu_ptr(sdd->sgp, i); - atomic_inc(&sg->sgp->ref); + if (atomic_inc_return(&sg->sgp->ref) == 1) + build_group_mask(sd, sg); + + /* + * Make sure the first group of this domain contains the + * canonical balance cpu. Otherwise the sched_domain iteration + * breaks. See update_sg_lb_stats(). + */ if ((!groups && cpumask_test_cpu(cpu, sg_span)) || - cpumask_first(sg_span) == cpu) { - WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span)); + group_balance_cpu(sg) == cpu) groups = sg; - } if (!first) first = sg; @@ -6109,6 +6156,7 @@ build_sched_groups(struct sched_domain *sd, int cpu) cpumask_clear(sched_group_cpus(sg)); sg->sgp->power = 0; + cpumask_setall(sched_group_mask(sg)); for_each_cpu(j, span) { if (get_group(j, sdd, NULL) != group) @@ -6150,7 +6198,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) sg = sg->next; } while (sg != sd->groups); - if (cpu != group_first_cpu(sg)) + if (cpu != group_balance_cpu(sg)) return; update_group_power(sd, cpu); @@ -6525,7 +6573,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map) *per_cpu_ptr(sdd->sg, j) = sg; - sgp = kzalloc_node(sizeof(struct sched_group_power), + sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); if (!sgp) return -ENOMEM; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2a2d236f27..54cbaa4e7b3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, int i; if (local_group) - balance_cpu = group_first_cpu(group); + balance_cpu = group_balance_cpu(group); /* Tally up the load of all CPUs in the group */ max_cpu_load = 0; @@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env, /* Bias balancing toward cpus of our domain */ if (local_group) { - if (idle_cpu(i) && !first_idle_cpu) { + if (idle_cpu(i) && !first_idle_cpu && + cpumask_test_cpu(i, sched_group_mask(group))) { first_idle_cpu = 1; balance_cpu = i; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ba9dccfd24c..6d52cea7f33 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(int, sd_llc_id); +extern int group_balance_cpu(struct sched_group *sg); + #endif /* CONFIG_SMP */ #include "stats.h" From c3decf0dfbc95736b7c0ab68fa4e5854c4734da9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 May 2012 12:05:32 +0200 Subject: [PATCH 220/475] sched: Always initialize cpu-power Often when we run into mis-shapen topologies the balance iteration fails to update the cpu power properly and we'll end up in /0 traps. Always initialize the cpu-power to a semi-sane value so that we can at least boot the machine, even if the load-balancer might not function correctly. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3lbhyj25sr169ha7z3qht5na@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 13 ++++++++++++- kernel/sched/fair.c | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 781acb91a50..725ee7c1c8c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5604,7 +5604,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!group->sgp->power) { + /* + * Even though we initialize ->power to something semi-sane, + * we leave power_orig unset. This allows us to detect if + * domain iteration is still funny without causing /0 traps. + */ + if (!group->sgp->power_orig) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: domain->cpu_power not " "set\n"); @@ -6075,6 +6080,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) if (atomic_inc_return(&sg->sgp->ref) == 1) build_group_mask(sd, sg); + /* + * Initialize sgp->power such that even if we mess up the + * domains and no possible iteration will get us here, we won't + * die on a /0 trap. + */ + sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); /* * Make sure the first group of this domain contains the diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 54cbaa4e7b3..c9fd6d673d0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3602,7 +3602,7 @@ void update_group_power(struct sched_domain *sd, int cpu) } while (group != child->groups); } - sdg->sgp->power = power; + sdg->sgp->power_orig = sdg->sgp->power = power; } /* From d039ac60800fe8ed8522ec3b9ca796aaf748c18b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 May 2012 21:20:16 +0200 Subject: [PATCH 221/475] sched: Validate assumptions in sched_init_numa() Add some code to validate assumptions we're making and output warnings if they are not. If this trigger we want to know about it. Signed-off-by: Peter Zijlstra Cc: Alex Shi Link: http://lkml.kernel.org/n/tip-6uc3wk5s9udxtdl9cnku0vtt@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 99 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 19 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 725ee7c1c8c..2bdd1761643 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5556,15 +5556,20 @@ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ #ifdef CONFIG_SCHED_DEBUG -static __read_mostly int sched_domain_debug_enabled; +static __read_mostly int sched_debug_enabled; -static int __init sched_domain_debug_setup(char *str) +static int __init sched_debug_setup(char *str) { - sched_domain_debug_enabled = 1; + sched_debug_enabled = 1; return 0; } -early_param("sched_debug", sched_domain_debug_setup); +early_param("sched_debug", sched_debug_setup); + +static inline bool sched_debug(void) +{ + return sched_debug_enabled; +} static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, struct cpumask *groupmask) @@ -5657,7 +5662,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) { int level = 0; - if (!sched_domain_debug_enabled) + if (!sched_debug_enabled) return; if (!sd) { @@ -5678,6 +5683,10 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) } #else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) +static inline bool sched_debug(void) +{ + return false; +} #endif /* CONFIG_SCHED_DEBUG */ static int sd_degenerate(struct sched_domain *sd) @@ -6373,7 +6382,6 @@ static struct sched_domain_topology_level *sched_domain_topology = default_topol #ifdef CONFIG_NUMA static int sched_domains_numa_levels; -static int sched_domains_numa_scale; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; static int sched_domains_curr_level; @@ -6438,6 +6446,42 @@ static const struct cpumask *sd_numa_mask(int cpu) return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; } +static void sched_numa_warn(const char *str) +{ + static int done = false; + int i,j; + + if (done) + return; + + done = true; + + printk(KERN_WARNING "ERROR: %s\n\n", str); + + for (i = 0; i < nr_node_ids; i++) { + printk(KERN_WARNING " "); + for (j = 0; j < nr_node_ids; j++) + printk(KERN_CONT "%02d ", node_distance(i,j)); + printk(KERN_CONT "\n"); + } + printk(KERN_WARNING "\n"); +} + +static bool find_numa_distance(int distance) +{ + int i; + + if (distance == node_distance(0, 0)) + return true; + + for (i = 0; i < sched_domains_numa_levels; i++) { + if (sched_domains_numa_distance[i] == distance) + return true; + } + + return false; +} + static void sched_init_numa(void) { int next_distance, curr_distance = node_distance(0, 0); @@ -6445,7 +6489,6 @@ static void sched_init_numa(void) int level = 0; int i, j, k; - sched_domains_numa_scale = curr_distance; sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); if (!sched_domains_numa_distance) return; @@ -6456,23 +6499,41 @@ static void sched_init_numa(void) * * Assumes node_distance(0,j) includes all distances in * node_distance(i,j) in order to avoid cubic time. - * - * XXX: could be optimized to O(n log n) by using sort() */ next_distance = curr_distance; for (i = 0; i < nr_node_ids; i++) { for (j = 0; j < nr_node_ids; j++) { - int distance = node_distance(0, j); - if (distance > curr_distance && - (distance < next_distance || - next_distance == curr_distance)) - next_distance = distance; + for (k = 0; k < nr_node_ids; k++) { + int distance = node_distance(i, k); + + if (distance > curr_distance && + (distance < next_distance || + next_distance == curr_distance)) + next_distance = distance; + + /* + * While not a strong assumption it would be nice to know + * about cases where if node A is connected to B, B is not + * equally connected to A. + */ + if (sched_debug() && node_distance(k, i) != distance) + sched_numa_warn("Node-distance not symmetric"); + + if (sched_debug() && i && !find_numa_distance(distance)) + sched_numa_warn("Node-0 not representative"); + } + if (next_distance != curr_distance) { + sched_domains_numa_distance[level++] = next_distance; + sched_domains_numa_levels = level; + curr_distance = next_distance; + } else break; } - if (next_distance != curr_distance) { - sched_domains_numa_distance[level++] = next_distance; - sched_domains_numa_levels = level; - curr_distance = next_distance; - } else break; + + /* + * In case of sched_debug() we verify the above assumption. + */ + if (!sched_debug()) + break; } /* * 'level' contains the number of unique distances, excluding the From b430f7c4706aeba4270c7ab7744fc504b9315e1c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 15:30:31 +0200 Subject: [PATCH 222/475] perf/x86: Fix Intel shared extra MSR allocation Zheng Yan reported that event group validation can wreck event state when Intel extra_reg allocation changes event state. Validation shouldn't change any persistent state. Cloning events in validate_{event,group}() isn't really pretty either, so add a few special cases to avoid modifying the event state. The code is restructured to minimize the special case impact. Reported-by: Zheng Yan Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 + arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 92 ++++++++++++++++++-------- 3 files changed, 66 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da018..cb608383e4f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) if (!cpuc->shared_regs) goto error; } + cpuc->is_fake = 1; return cpuc; error: free_fake_cpuc(cpuc); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf5449..83794d8e6af 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -117,6 +117,7 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ unsigned int group_flag; + int is_fake; /* * Intel DebugStore bits diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6ae..965baa2fa79 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) return NULL; } -static bool intel_try_alt_er(struct perf_event *event, int orig_idx) +static int intel_alt_er(int idx) { if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) - return false; + return idx; - if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { - event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= 0x01bb; - event->hw.extra_reg.idx = EXTRA_REG_RSP_1; - event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; - } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { + if (idx == EXTRA_REG_RSP_0) + return EXTRA_REG_RSP_1; + + if (idx == EXTRA_REG_RSP_1) + return EXTRA_REG_RSP_0; + + return idx; +} + +static void intel_fixup_er(struct perf_event *event, int idx) +{ + event->hw.extra_reg.idx = idx; + + if (idx == EXTRA_REG_RSP_0) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; event->hw.config |= 0x01b7; - event->hw.extra_reg.idx = EXTRA_REG_RSP_0; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; + } else if (idx == EXTRA_REG_RSP_1) { + event->hw.config &= ~INTEL_ARCH_EVENT_MASK; + event->hw.config |= 0x01bb; + event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; } - - if (event->hw.extra_reg.idx == orig_idx) - return false; - - return true; } /* @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, struct event_constraint *c = &emptyconstraint; struct er_account *era; unsigned long flags; - int orig_idx = reg->idx; + int idx = reg->idx; - /* already allocated shared msr */ - if (reg->alloc) + /* + * reg->alloc can be set due to existing state, so for fake cpuc we + * need to ignore this, otherwise we might fail to allocate proper fake + * state for this extra reg constraint. Also see the comment below. + */ + if (reg->alloc && !cpuc->is_fake) return NULL; /* call x86_get_event_constraint() */ again: - era = &cpuc->shared_regs->regs[reg->idx]; + era = &cpuc->shared_regs->regs[idx]; /* * we use spin_lock_irqsave() to avoid lockdep issues when * passing a fake cpuc @@ -1173,6 +1183,29 @@ again: if (!atomic_read(&era->ref) || era->config == reg->config) { + /* + * If its a fake cpuc -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + * + * Not doing the ER fixup will only result in era->reg being + * wrong, but since we won't actually try and program hardware + * this isn't a problem either. + */ + if (!cpuc->is_fake) { + if (idx != reg->idx) + intel_fixup_er(event, idx); + + /* + * x86_schedule_events() can call get_event_constraints() + * multiple times on events in the case of incremental + * scheduling(). reg->alloc ensures we only do the ER + * allocation once. + */ + reg->alloc = 1; + } + /* lock in msr value */ era->config = reg->config; era->reg = reg->reg; @@ -1180,17 +1213,17 @@ again: /* one more user */ atomic_inc(&era->ref); - /* no need to reallocate during incremental event scheduling */ - reg->alloc = 1; - /* * need to call x86_get_event_constraint() * to check if associated event has constraints */ c = NULL; - } else if (intel_try_alt_er(event, orig_idx)) { - raw_spin_unlock_irqrestore(&era->lock, flags); - goto again; + } else { + idx = intel_alt_er(idx); + if (idx != reg->idx) { + raw_spin_unlock_irqrestore(&era->lock, flags); + goto again; + } } raw_spin_unlock_irqrestore(&era->lock, flags); @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, struct er_account *era; /* - * only put constraint if extra reg was actually - * allocated. Also takes care of event which do - * not use an extra shared reg + * Only put constraint if extra reg was actually allocated. Also takes + * care of event which do not use an extra shared reg. + * + * Also, if this is a fake cpuc we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent cpuc state + * either since it'll be thrown out. */ - if (!reg->alloc) + if (!reg->alloc || cpuc->is_fake) return; era = &cpuc->shared_regs->regs[reg->idx]; From cccb9ba9e4ee0d750265f53de9258df69655c40b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: [PATCH 223/475] perf/x86: Implement cycles:p for SNB/IVB Now that there's finally a chip with working PEBS (IvyBridge), we can enable the hardware and implement cycles:p for SNB/IVB. Cc: Stephane Eranian Requested-and-tested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 50 +++++++++++++++++++++----- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 83794d8e6af..7241e2fc3c1 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -365,6 +365,7 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + void (*pebs_aliases)(struct perf_event *event); /* * Intel LBR diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 965baa2fa79..2312c1ff1b1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1336,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, intel_put_shared_regs_event_constraints(cpuc, event); } -static int intel_pmu_hw_config(struct perf_event *event) +static void intel_pebs_aliases_core2(struct perf_event *event) { - int ret = x86_pmu_hw_config(event); - - if (ret) - return ret; - - if (event->attr.precise_ip && - (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { /* * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P * (0x003c) so that we can use it with PEBS. @@ -1365,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event) */ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_snb(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use UOPS_RETIRED.ALL + * (0x01c2), which is a PEBS capable event, to get the same + * count. + * + * UOPS_RETIRED.ALL counts the number of cycles that retires + * CNTMASK micro-ops. By setting CNTMASK to a value (16) + * larger than the maximum number of micro-ops that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less micro-ops, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); event->hw.config = alt_config; } +} + +static int intel_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + if (event->attr.precise_ip && x86_pmu.pebs_aliases) + x86_pmu.pebs_aliases(event); if (intel_pmu_needs_lbr_smpl(event)) { ret = intel_pmu_setup_lbr_filter(event); @@ -1643,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .pebs_aliases = intel_pebs_aliases_core2, .format_attrs = intel_arch3_formats_attr, @@ -1885,6 +1918,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; From b6db437ba8322f5cee0bd355ad2ef9f73c413754 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: [PATCH 224/475] perf/x86: Enable/Add IvyBridge hardware support Implement rudimentary IVB perf support. The SDM states its identical to SNB with exception of the exact event tables, but a quick look suggests they're similar enough. Also mark SNB-EP as broken for now. Requested-and-tested-by: Linus Torvalds Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2312c1ff1b1..187c294bc65 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1909,8 +1909,9 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ - x86_add_quirk(intel_sandybridge_quirk); case 45: /* SandyBridge, "Romely-EP" */ + x86_add_quirk(intel_sandybridge_quirk); + case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); From 8440ccb43fc0ecffcf1acee0273d766e6a8cd51d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: [PATCH 225/475] perf/x86: Update SNB PEBS constraints Afaict there's no need to (incompletely) iterate the MEM_UOPS_RETIRED.* umask state. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5a3edc27f6e..35e2192df9f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ From 67384fe3fd450536342330f684ea1f7dcaef8130 Mon Sep 17 00:00:00 2001 From: Eugeni Dodonov Date: Wed, 6 Jun 2012 11:59:06 -0300 Subject: [PATCH 226/475] char/agp: add another Ironlake host bridge This seems to come on Gigabyte H55M-S2V and was discovered through the https://bugs.freedesktop.org/show_bug.cgi?id=50381 debugging. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50381 Signed-off-by: Eugeni Dodonov Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/char/agp/intel-agp.c | 1 + drivers/char/agp/intel-agp.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 764f70c5e69..0a418527941 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -898,6 +898,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_B43_HB), ID(PCI_DEVICE_ID_INTEL_B43_1_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB), + ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB), diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h index c0091753a0d..8e2d9140f30 100644 --- a/drivers/char/agp/intel-agp.h +++ b/drivers/char/agp/intel-agp.h @@ -212,6 +212,7 @@ #define PCI_DEVICE_ID_INTEL_G41_HB 0x2E30 #define PCI_DEVICE_ID_INTEL_G41_IG 0x2E32 #define PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB 0x0040 +#define PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB 0x0069 #define PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG 0x0042 #define PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB 0x0044 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB 0x0062 From a841f8cef4bb124f0f5563314d0beaf2e1249d72 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 5 Jun 2012 13:44:36 -0500 Subject: [PATCH 227/475] sched: Fix the relax_domain_level boot parameter It does not get processed because sched_domain_level_max is 0 at the time that setup_relax_domain_level() is run. Simply accept the value as it is, as we don't know the value of sched_domain_level_max until sched domain construction is completed. Fix sched_relax_domain_level in cpuset. The build_sched_domain() routine calls the set_domain_attribute() routine prior to setting the sd->level, however, the set_domain_attribute() routine relies on the sd->level to decide whether idle load balancing will be off/on. Signed-off-by: Dimitri Sivanich Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120605184436.GA15668@sgi.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2bdd1761643..d5594a4268d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6268,11 +6268,8 @@ int sched_domain_level_max; static int __init setup_relax_domain_level(char *str) { - unsigned long val; - - val = simple_strtoul(str, NULL, 0); - if (val < sched_domain_level_max) - default_relax_domain_level = val; + if (kstrtoint(str, 0, &default_relax_domain_level)) + pr_warn("Unable to set relax_domain_level\n"); return 1; } @@ -6698,7 +6695,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, if (!sd) return child; - set_domain_attribute(sd, attr); cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); if (child) { sd->level = child->level + 1; @@ -6706,6 +6702,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, child->parent = sd; } sd->child = child; + set_domain_attribute(sd, attr); return sd; } From 302fa4b58ac754a6da13f4f5546f710fecc3b945 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:33 -0700 Subject: [PATCH 228/475] perf/x86: Allow multiple stacks Without this patch, applications with two different stack regions (eg: native stack vs JIT stack) get truncated callchains even when RBP chaining is present. GDB shows proper stack traces and the frame pointer chaining is intact. This patch disables the (fp < RSP) check, hoping that other checks in the code save the day for us. In our limited testing, this didn't seem to break anything. In the long term, we could potentially have userspace advise the kernel on the range of valid stack addresses, so we don't spend a lot of time unwinding from bogus addresses. Signed-off-by: Arun Sharma CC: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Stephane Eranian Cc: Namhyung Kim Cc: Tom Zanussi Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-2-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index cb608383e4f..e78bc256aea 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1781,9 +1781,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (bytes != sizeof(frame)) break; - if (fp < compat_ptr(regs->sp)) - break; - perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } @@ -1827,9 +1824,6 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) if (bytes != sizeof(frame)) break; - if ((unsigned long)fp < regs->sp) - break; - perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } From 0b0d9cf6ec7bab91977da2d71c09157f110f7c2e Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:34 -0700 Subject: [PATCH 229/475] perf: Limit callchains to 127 Stack depth of 255 seems excessive, given that copy_from_user_nmi() could be slow. Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-3-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1817d4015e5..45db49f64bb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,7 +555,7 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 255 +#define PERF_MAX_STACK_DEPTH 127 enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, From bc6ca7b342d5ae15c3ba3081fd40271b8039fb25 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:35 -0700 Subject: [PATCH 230/475] perf/x86: Check if user fp is valid Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-4-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 12 ++++++------ arch/x86/kernel/cpu/perf_event.c | 12 ++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 04cd6882308..e1f3a17034f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,9 +33,8 @@ #define segment_eq(a, b) ((a).seg == (b).seg) #define user_addr_max() (current_thread_info()->addr_limit.seg) -#define __addr_ok(addr) \ - ((unsigned long __force)(addr) < \ - (current_thread_info()->addr_limit.seg)) +#define __addr_ok(addr) \ + ((unsigned long __force)(addr) < user_addr_max()) /* * Test whether a block of memory is a valid user space address. @@ -47,14 +46,14 @@ * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ -#define __range_not_ok(addr, size) \ +#define __range_not_ok(addr, size, limit) \ ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ : "=&r" (flag), "=r" (roksum) \ : "1" (addr), "g" ((long)(size)), \ - "rm" (current_thread_info()->addr_limit.seg)); \ + "rm" (limit)); \ flag; \ }) @@ -77,7 +76,8 @@ * checks that the pointer is in the user space range - after calling * this function, memory access functions may still return -EFAULT. */ -#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define access_ok(type, addr, size) \ + (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) /* * The exception table consists of pairs of addresses relative to the diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e78bc256aea..c4706cf9c01 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1757,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); } +static inline int +valid_user_frame(const void __user *fp, unsigned long size) +{ + return (__range_not_ok(fp, size, TASK_SIZE) == 0); +} + #ifdef CONFIG_COMPAT #include @@ -1781,6 +1787,9 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (bytes != sizeof(frame)) break; + if (!valid_user_frame(fp, sizeof(frame))) + break; + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } @@ -1824,6 +1833,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) if (bytes != sizeof(frame)) break; + if (!valid_user_frame(fp, sizeof(frame))) + break; + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } From db0dc75d6403b6663c0eab4c6ccb672eb9b2ed72 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:36 -0700 Subject: [PATCH 231/475] perf/x86: Check user address explicitly in copy_from_user_nmi() Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-5-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f61ee67ec00..677b1ed184c 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -8,6 +8,7 @@ #include #include +#include /* * best effort, GUP based copy_from_user() that is NMI-safe @@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) void *map; int ret; + if (__range_not_ok(from, n, TASK_SIZE) == 0) + return len; + do { ret = __get_user_pages_fast(addr, 1, 0, &page); if (!ret) From 10db9e009af5c3647b9ee742dcb14f6ff447a2a5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 6 Jun 2012 11:21:44 -0400 Subject: [PATCH 232/475] tile: remove cpu_idle_on_new_stack This routine isn't used unless CONFIG_HOMECACHE is enabled, which isn't even available as a public configuration option yet. Since it no longer links correctly in 3.4, just remove it for now. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/thread_info.h | 5 ----- arch/tile/kernel/entry.S | 14 -------------- 2 files changed, 19 deletions(-) diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 7e1fef36bde..e9c670d7a7f 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -91,11 +91,6 @@ extern void smp_nap(void); /* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ extern void _cpu_idle(void); -/* Switch boot idle thread to a freshly-allocated stack and free old stack. */ -extern void cpu_idle_on_new_stack(struct thread_info *old_ti, - unsigned long new_sp, - unsigned long new_ss10); - #else /* __ASSEMBLY__ */ /* diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 133c4b56a99..c31637baff2 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -68,20 +68,6 @@ STD_ENTRY(KBacktraceIterator_init_current) jrp lr /* keep backtracer happy */ STD_ENDPROC(KBacktraceIterator_init_current) -/* - * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then - * free the old stack (passed in r0) and re-invoke cpu_idle(). - * We update sp and ksp0 simultaneously to avoid backtracer warnings. - */ -STD_ENTRY(cpu_idle_on_new_stack) - { - move sp, r1 - mtspr SPR_SYSTEM_SAVE_K_0, r2 - } - jal free_thread_info - j cpu_idle - STD_ENDPROC(cpu_idle_on_new_stack) - /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap From 2ded5c2484d7375860bdb5f3df1954a346a2da26 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 6 Jun 2012 11:23:19 -0400 Subject: [PATCH 233/475] tile: add #include to unbreak build after generic init_task conversion Some code was moved from init_task.c to setup.c but the appropriate header needed to be moved as well. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 6098ccc59be..dd87f342039 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include From ff3dd78cb8055bcb3a10e526044d8b54a773c612 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 4 Jun 2012 19:22:55 +0000 Subject: [PATCH 234/475] stmmac: fix driver's doc when run kernel-doc script Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 70966330f44..ea33eae22ee 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -833,8 +833,9 @@ static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv) /** * stmmac_selec_desc_mode - * @dev : device pointer - * Description: select the Enhanced/Alternate or Normal descriptors */ + * @priv : private structure + * Description: select the Enhanced/Alternate or Normal descriptors + */ static void stmmac_selec_desc_mode(struct stmmac_priv *priv) { if (priv->plat->enh_desc) { @@ -1861,6 +1862,8 @@ static int stmmac_hw_init(struct stmmac_priv *priv) /** * stmmac_dvr_probe * @device: device pointer + * @plat_dat: platform data pointer + * @addr: iobase memory address * Description: this is the main probe function used to * call the alloc_etherdev, allocate the priv structure. */ From 3d2377144ca66c98fa220bda5f945590cf5cfdc0 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 4 Jun 2012 19:22:56 +0000 Subject: [PATCH 235/475] stmmac: update driver's doc Fixed the driver's documentation that was obsolete and didn't report new platform fields (recently added). Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- Documentation/networking/stmmac.txt | 44 ++++++++++++++++------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index ab1e8d7004c..5cb9a197246 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -10,8 +10,8 @@ Currently this network device driver is for all STM embedded MAC/GMAC (i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000 FF1152AMT0221 D1215994A VIRTEX FPGA board. -DWC Ether MAC 10/100/1000 Universal version 3.60a (and older) and DWC Ether MAC 10/100 -Universal version 4.0 have been used for developing this driver. +DWC Ether MAC 10/100/1000 Universal version 3.60a (and older) and DWC Ether +MAC 10/100 Universal version 4.0 have been used for developing this driver. This driver supports both the platform bus and PCI. @@ -54,27 +54,27 @@ net_device structure enabling the scatter/gather feature. When one or more packets are received, an interrupt happens. The interrupts are not queued so the driver has to scan all the descriptors in the ring during the receive process. -This is based on NAPI so the interrupt handler signals only if there is work to be -done, and it exits. +This is based on NAPI so the interrupt handler signals only if there is work +to be done, and it exits. Then the poll method will be scheduled at some future point. The incoming packets are stored, by the DMA, in a list of pre-allocated socket buffers in order to avoid the memcpy (Zero-copy). 4.3) Timer-Driver Interrupt -Instead of having the device that asynchronously notifies the frame receptions, the -driver configures a timer to generate an interrupt at regular intervals. -Based on the granularity of the timer, the frames that are received by the device -will experience different levels of latency. Some NICs have dedicated timer -device to perform this task. STMMAC can use either the RTC device or the TMU -channel 2 on STLinux platforms. +Instead of having the device that asynchronously notifies the frame receptions, +the driver configures a timer to generate an interrupt at regular intervals. +Based on the granularity of the timer, the frames that are received by the +device will experience different levels of latency. Some NICs have dedicated +timer device to perform this task. STMMAC can use either the RTC device or the +TMU channel 2 on STLinux platforms. The timers frequency can be passed to the driver as parameter; when change it, take care of both hardware capability and network stability/performance impact. -Several performance tests on STM platforms showed this optimisation allows to spare -the CPU while having the maximum throughput. +Several performance tests on STM platforms showed this optimisation allows to +spare the CPU while having the maximum throughput. 4.4) WOL -Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC -core. +Wake up on Lan feature through Magic and Unicast frames are supported for the +GMAC core. 4.5) DMA descriptors Driver handles both normal and enhanced descriptors. The latter has been only @@ -106,7 +106,8 @@ Several driver's information can be passed through the platform These are included in the include/linux/stmmac.h header file and detailed below as well: - struct plat_stmmacenet_data { +struct plat_stmmacenet_data { + char *phy_bus_name; int bus_id; int phy_addr; int interface; @@ -124,19 +125,24 @@ and detailed below as well: void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev); void (*exit)(struct platform_device *pdev); + void *custom_cfg; + void *custom_data; void *bsp_priv; }; Where: + o phy_bus_name: phy bus name to attach to the stmmac. o bus_id: bus identifier. o phy_addr: the physical address can be passed from the platform. If it is set to -1 the driver will automatically detect it at run-time by probing all the 32 addresses. o interface: PHY device's interface. o mdio_bus_data: specific platform fields for the MDIO bus. - o pbl: the Programmable Burst Length is maximum number of beats to + o dma_cfg: internal DMA parameters + o pbl: the Programmable Burst Length is maximum number of beats to be transferred in one DMA transaction. GMAC also enables the 4xPBL by default. + o fixed_burst/mixed_burst/burst_len o clk_csr: fixed CSR Clock range selection. o has_gmac: uses the GMAC core. o enh_desc: if sets the MAC will use the enhanced descriptor structure. @@ -160,8 +166,9 @@ Where: this is sometime necessary on some platforms (e.g. ST boxes) where the HW needs to have set some PIO lines or system cfg registers. - o custom_cfg: this is a custom configuration that can be passed while - initialising the resources. + o custom_cfg/custom_data: this is a custom configuration that can be passed + while initialising the resources. + o bsp_priv: another private poiter. For MDIO bus The we have: @@ -180,7 +187,6 @@ Where: o irqs: list of IRQs, one per PHY. o probed_phy_irq: if irqs is NULL, use this for probed PHY. - For DMA engine we have the following internal fields that should be tuned according to the HW capabilities. From ba27ec66ffeb78cbf9f85e168b32551a9aaf2a34 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 4 Jun 2012 19:22:57 +0000 Subject: [PATCH 236/475] stmmac: fix driver Kconfig when built as module This patches fixes the driver when built as dynamic module. In fact, the platform part cannot be built and the probe fails (thanks to Bob Liu that reported this bug). v2: as D. Miller suggested, it is not necessary to make the pci and the platform code mutually exclusive. Having both could also help, at built time ,to verify that all the code is validated and compiles fine. v3: removed wrong Reviewed-by from the patch Reported-by: Bob Liu cc: Rayagond Kokatanur Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 5 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 3 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 25 ++++++++++++++++ .../net/ethernet/stmicro/stmmac/stmmac_pci.c | 29 +------------------ .../ethernet/stmicro/stmmac/stmmac_platform.c | 4 +-- 5 files changed, 31 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 036428348fa..0076f770e63 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -13,9 +13,8 @@ config STMMAC_ETH if STMMAC_ETH config STMMAC_PLATFORM - tristate "STMMAC platform bus support" + bool "STMMAC Platform bus support" depends on STMMAC_ETH - default y ---help--- This selects the platform specific bus support for the stmmac device driver. This is the driver used @@ -26,7 +25,7 @@ config STMMAC_PLATFORM If unsure, say N. config STMMAC_PCI - tristate "STMMAC support on PCI bus (EXPERIMENTAL)" + bool "STMMAC PCI bus support (EXPERIMENTAL)" depends on STMMAC_ETH && PCI && EXPERIMENTAL ---help--- This is to select the Synopsys DWMAC available on PCI devices, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 6b5d060ee9d..6d07ba2c866 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -95,7 +95,8 @@ extern int stmmac_mdio_register(struct net_device *ndev); extern void stmmac_set_ethtool_ops(struct net_device *netdev); extern const struct stmmac_desc_ops enh_desc_ops; extern const struct stmmac_desc_ops ndesc_ops; - +extern struct pci_driver stmmac_pci_driver; +extern struct platform_driver stmmac_pltfr_driver; int stmmac_freeze(struct net_device *ndev); int stmmac_restore(struct net_device *ndev); int stmmac_resume(struct net_device *ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ea33eae22ee..36385695141 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -42,6 +42,7 @@ #include #include #include +#include #ifdef CONFIG_STMMAC_DEBUG_FS #include #include @@ -2093,6 +2094,30 @@ int stmmac_restore(struct net_device *ndev) } #endif /* CONFIG_PM */ +static int __init stmmac_init(void) +{ + int err = 0; + + err = platform_driver_register(&stmmac_pltfr_driver); + + if (!err) { + err = pci_register_driver(&stmmac_pci_driver); + if (err) + platform_driver_unregister(&stmmac_pltfr_driver); + } + + return err; +} + +static void __exit stmmac_exit(void) +{ + pci_unregister_driver(&stmmac_pci_driver); + platform_driver_unregister(&stmmac_pltfr_driver); +} + +module_init(stmmac_init); +module_exit(stmmac_exit); + #ifndef MODULE static int __init stmmac_cmdline_opt(char *str) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 58fab5303e9..cf826e6b6aa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -179,7 +179,7 @@ static DEFINE_PCI_DEVICE_TABLE(stmmac_id_table) = { MODULE_DEVICE_TABLE(pci, stmmac_id_table); -static struct pci_driver stmmac_driver = { +struct pci_driver stmmac_pci_driver = { .name = STMMAC_RESOURCE_NAME, .id_table = stmmac_id_table, .probe = stmmac_pci_probe, @@ -190,33 +190,6 @@ static struct pci_driver stmmac_driver = { #endif }; -/** - * stmmac_init_module - Entry point for the driver - * Description: This function is the entry point for the driver. - */ -static int __init stmmac_init_module(void) -{ - int ret; - - ret = pci_register_driver(&stmmac_driver); - if (ret < 0) - pr_err("%s: ERROR: driver registration failed\n", __func__); - - return ret; -} - -/** - * stmmac_cleanup_module - Cleanup routine for the driver - * Description: This function is the cleanup routine for the driver. - */ -static void __exit stmmac_cleanup_module(void) -{ - pci_unregister_driver(&stmmac_driver); -} - -module_init(stmmac_init_module); -module_exit(stmmac_cleanup_module); - MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PCI driver"); MODULE_AUTHOR("Rayagond Kokatanur "); MODULE_AUTHOR("Giuseppe Cavallaro "); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3dd8f080380..680d2b8dfe2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -255,7 +255,7 @@ static const struct of_device_id stmmac_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, stmmac_dt_ids); -static struct platform_driver stmmac_driver = { +struct platform_driver stmmac_pltfr_driver = { .probe = stmmac_pltfr_probe, .remove = stmmac_pltfr_remove, .driver = { @@ -266,8 +266,6 @@ static struct platform_driver stmmac_driver = { }, }; -module_platform_driver(stmmac_driver); - MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PLATFORM driver"); MODULE_AUTHOR("Giuseppe Cavallaro "); MODULE_LICENSE("GPL"); From edc4a67e15e34d2b3a2ab968625fd157751125d8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 24 May 2012 16:08:09 +0300 Subject: [PATCH 237/475] mlx4_core: Fix setting VL_cap in mlx4_SET_PORT wrapper flow Commit 096335b3f983 ("mlx4_core: Allow dynamic MTU configuration for IB ports") modifies the port VL setting. This exposes a bug in mlx4_common_set_port(), where the VL cap value passed in (inside the command mailbox) is incorrectly zeroed-out: mlx4_SET_PORT modifies the VL_cap field (byte 3 of the mailbox). Since the SET_PORT command is paravirtualized on the master as well as on the slaves, mlx4_SET_PORT_wrapper() is invoked on the master. This calls mlx4_common_set_port() where mailbox byte 3 gets overwritten by code which should only set a single bit in that byte (for the reset qkey counter flag) -- but instead overwrites the entire byte. The result is that when running in SR-IOV mode, the VL_cap will be set to zero -- fix this. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 1fe2c7a8b40..a8fb52992c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -697,10 +697,10 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, if (slave != dev->caps.function) memset(inbox->buf, 0, 256); if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { - *(u8 *) inbox->buf = !!reset_qkey_viols << 6; + *(u8 *) inbox->buf |= !!reset_qkey_viols << 6; ((__be32 *) inbox->buf)[2] = agg_cap_mask; } else { - ((u8 *) inbox->buf)[3] = !!reset_qkey_viols; + ((u8 *) inbox->buf)[3] |= !!reset_qkey_viols; ((__be32 *) inbox->buf)[1] = agg_cap_mask; } From fc2d004419abebcfd740f46c32dd8201ce1b33c9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 24 May 2012 16:08:08 +0300 Subject: [PATCH 238/475] IB/mlx4: Fix max_wqe capacity reported from query device 1. Limit the max number of WQEs per QP reported when querying the device, so that ib_create_qp() will not fail for a QP size that the device claimed to support due to additional headroom WQEs being allocated. 2. Limit qp resources accepted for ib_create_qp() to the limits reported in ib_query_device(). In kernel space, make sure that the limits returned to the caller following qp creation also lie within the reported device limits. For userspace, report as before, and do adjustment in libmlx4 (so as not to break ABI). Signed-off-by: Jack Morgenstein Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 ++++++++ drivers/infiniband/hw/mlx4/qp.c | 21 +++++++++++++++------ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8afea12c8d4..3530c41fcd1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -140,7 +140,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; - props->max_qp_wr = dev->dev->caps.max_wqes; + props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e62297cc77c..ff36655d23d 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -44,6 +44,14 @@ #include #include +enum { + MLX4_IB_SQ_MIN_WQE_SHIFT = 6, + MLX4_IB_MAX_HEADROOM = 2048 +}; + +#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) +#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) + struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index ceb33327091..8d4ed24aef9 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -310,8 +310,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, int is_user, int has_rq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > dev->dev->caps.max_wqes || - cap->max_recv_sge > dev->dev->caps.max_rq_sg) + if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || + cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) return -EINVAL; if (!has_rq) { @@ -329,8 +329,17 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); } - cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; - cap->max_recv_sge = qp->rq.max_gs; + /* leave userspace return values as they were, so as not to break ABI */ + if (is_user) { + cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; + cap->max_recv_sge = qp->rq.max_gs; + } else { + cap->max_recv_wr = qp->rq.max_post = + min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); + cap->max_recv_sge = min(qp->rq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); + } return 0; } @@ -341,8 +350,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, int s; /* Sanity check SQ size before proceeding */ - if (cap->max_send_wr > dev->dev->caps.max_wqes || - cap->max_send_sge > dev->dev->caps.max_sq_sg || + if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || + cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || cap->max_inline_data + send_wqe_overhead(type, qp->flags) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; From 32a527add871a51b3c06177849d18bf71e33b320 Mon Sep 17 00:00:00 2001 From: Christian Dietrich Date: Tue, 5 Jun 2012 02:23:20 +0000 Subject: [PATCH 239/475] ide: icside.c: Fix compile with CONFIG_BLK_DEV_IDEDMA_ICS=n The icside driver can be configured without DMA support, but it doesn't compile then, because DMA operations are referenced. drivers/ide/icside.c:523: error: 'icside_v6_port_ops' undeclared drivers/ide/icside.c:522: error: 'icside_dma_init' undeclared Signed-off-by: Christian Dietrich Signed-off-by: David S. Miller --- drivers/ide/icside.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 8716066a2f2..83e5100d73f 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -375,8 +375,6 @@ static const struct ide_dma_ops icside_v6_dma_ops = { .dma_test_irq = icside_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, }; -#else -#define icside_v6_dma_ops NULL #endif static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d) @@ -456,7 +454,6 @@ err_free: static const struct ide_port_info icside_v6_port_info __initdata = { .init_dma = icside_dma_off_init, .port_ops = &icside_v6_no_dma_port_ops, - .dma_ops = &icside_v6_dma_ops, .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO, .mwdma_mask = ATA_MWDMA2, .swdma_mask = ATA_SWDMA2, @@ -518,11 +515,13 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) ecard_set_drvdata(ec, state); +#ifdef CONFIG_BLK_DEV_IDEDMA_ICS if (ec->dma != NO_DMA && !request_dma(ec->dma, DRV_NAME)) { d.init_dma = icside_dma_init; d.port_ops = &icside_v6_port_ops; - } else - d.dma_ops = NULL; + d.dma_ops = &icside_v6_dma_ops; + } +#endif ret = ide_host_register(host, &d, hws); if (ret) From 027253c138a12b075e724f94b6cc43fd7071c14f Mon Sep 17 00:00:00 2001 From: Christian Dietrich Date: Tue, 5 Jun 2012 02:28:59 +0000 Subject: [PATCH 240/475] ide: icside.c: fix printk format string compile warning Use correct format string parameter for the peak datarate, and prevent uninitialized use of cycle_time. Signed-off-by: Christian Dietrich Signed-off-by: David S. Miller --- drivers/ide/icside.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 83e5100d73f..bcb507b0cfd 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -236,7 +236,7 @@ static const struct ide_port_ops icside_v6_no_dma_port_ops = { */ static void icside_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - unsigned long cycle_time; + unsigned long cycle_time = 0; int use_dma_info = 0; const u8 xfer_mode = drive->dma_mode; @@ -271,9 +271,9 @@ static void icside_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) ide_set_drivedata(drive, (void *)cycle_time); - printk("%s: %s selected (peak %dMB/s)\n", drive->name, - ide_xfer_verbose(xfer_mode), - 2000 / (unsigned long)ide_get_drivedata(drive)); + printk(KERN_INFO "%s: %s selected (peak %luMB/s)\n", + drive->name, ide_xfer_verbose(xfer_mode), + 2000 / (cycle_time ? cycle_time : (unsigned long) -1)); } static const struct ide_port_ops icside_v6_port_ops = { From 9a43a02648e8414b2a757437820aa928208fc42f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jun 2012 06:40:43 +0000 Subject: [PATCH 241/475] mv643xx_eth: Fix compile error for architectures without clk. Commit 452503ebc (ARM: Orion: Eth: Add clk/clkdev support.) broke the building of the driver on architectures which don't have clk support. In particular PPC32 Pegasos which uses this driver. Add #ifdef around the clk API usage. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 04d901d0ff6..f0f06b2bc28 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -436,7 +436,9 @@ struct mv643xx_eth_private { /* * Hardware-specific parameters. */ +#if defined(CONFIG_HAVE_CLK) struct clk *clk; +#endif unsigned int t_clk; }; @@ -2895,17 +2897,17 @@ static int mv643xx_eth_probe(struct platform_device *pdev) mp->dev = dev; /* - * Get the clk rate, if there is one, otherwise use the default. + * Start with a default rate, and if there is a clock, allow + * it to override the default. */ + mp->t_clk = 133000000; +#if defined(CONFIG_HAVE_CLK) mp->clk = clk_get(&pdev->dev, (pdev->id ? "1" : "0")); if (!IS_ERR(mp->clk)) { clk_prepare_enable(mp->clk); mp->t_clk = clk_get_rate(mp->clk); - } else { - mp->t_clk = 133000000; - printk(KERN_WARNING "Unable to get clock"); } - +#endif set_params(mp, pd); netif_set_real_num_tx_queues(dev, mp->txq_count); netif_set_real_num_rx_queues(dev, mp->rxq_count); @@ -2995,10 +2997,13 @@ static int mv643xx_eth_remove(struct platform_device *pdev) phy_detach(mp->phy); cancel_work_sync(&mp->tx_timeout_task); +#if defined(CONFIG_HAVE_CLK) if (!IS_ERR(mp->clk)) { clk_disable_unprepare(mp->clk); clk_put(mp->clk); } +#endif + free_netdev(mp->dev); platform_set_drvdata(pdev, NULL); From dd03cff23d694cfb0fdae80cb618e7ced05ea696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 5 Jun 2012 21:18:10 +0000 Subject: [PATCH 242/475] net: sierra_net: device IDs for Aircard 320U++ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding device IDs for Aircard 320U and two other devices found in the out-of-tree version of this driver. Cc: linux@sierrawireless.com Cc: Autif Khan Cc: Tom Cassidy Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/usb/sierra_net.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 3faef5670d1..d75d1f56bec 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -946,7 +946,7 @@ struct sk_buff *sierra_net_tx_fixup(struct usbnet *dev, struct sk_buff *skb, } static const u8 sierra_net_ifnum_list[] = { 7, 10, 11 }; -static const struct sierra_net_info_data sierra_net_info_data_68A3 = { +static const struct sierra_net_info_data sierra_net_info_data_direct_ip = { .rx_urb_size = 8 * 1024, .whitelist = { .infolen = ARRAY_SIZE(sierra_net_ifnum_list), @@ -954,7 +954,7 @@ static const struct sierra_net_info_data sierra_net_info_data_68A3 = { } }; -static const struct driver_info sierra_net_info_68A3 = { +static const struct driver_info sierra_net_info_direct_ip = { .description = "Sierra Wireless USB-to-WWAN Modem", .flags = FLAG_WWAN | FLAG_SEND_ZLP, .bind = sierra_net_bind, @@ -962,12 +962,18 @@ static const struct driver_info sierra_net_info_68A3 = { .status = sierra_net_status, .rx_fixup = sierra_net_rx_fixup, .tx_fixup = sierra_net_tx_fixup, - .data = (unsigned long)&sierra_net_info_data_68A3, + .data = (unsigned long)&sierra_net_info_data_direct_ip, }; static const struct usb_device_id products[] = { {USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless USB-to-WWAN modem */ - .driver_info = (unsigned long) &sierra_net_info_68A3}, + .driver_info = (unsigned long) &sierra_net_info_direct_ip}, + {USB_DEVICE(0x0F3D, 0x68A3), /* AT&T Direct IP modem */ + .driver_info = (unsigned long) &sierra_net_info_direct_ip}, + {USB_DEVICE(0x1199, 0x68AA), /* Sierra Wireless Direct IP LTE modem */ + .driver_info = (unsigned long) &sierra_net_info_direct_ip}, + {USB_DEVICE(0x0F3D, 0x68AA), /* AT&T Direct IP LTE modem */ + .driver_info = (unsigned long) &sierra_net_info_direct_ip}, {}, /* last item */ }; From 55432d2b543a4b6dfae54f5c432a566877a85d90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Jun 2012 03:00:18 +0000 Subject: [PATCH 243/475] inetpeer: fix a race in inetpeer_gc_worker() commit 5faa5df1fa2024 (inetpeer: Invalidate the inetpeer tree along with the routing cache) added a race : Before freeing an inetpeer, we must respect a RCU grace period, and make sure no user will attempt to increase refcnt. inetpeer_invalidate_tree() waits for a RCU grace period before inserting inetpeer tree into gc_list and waking the worker. At that time, no concurrent lookup can find a inetpeer in this tree. Signed-off-by: Eric Dumazet Cc: Steffen Klassert Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/inetpeer.h | 5 ++++- net/ipv4/inetpeer.c | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index b94765e38e8..2040bff945d 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -40,7 +40,10 @@ struct inet_peer { u32 pmtu_orig; u32 pmtu_learned; struct inetpeer_addr_base redirect_learned; - struct list_head gc_list; + union { + struct list_head gc_list; + struct rcu_head gc_rcu; + }; /* * Once inet_peer is queued for deletion (refcnt == -1), following fields * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d4d61b694fa..dfba343b250 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -560,6 +560,17 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) } EXPORT_SYMBOL(inet_peer_xrlim_allow); +static void inetpeer_inval_rcu(struct rcu_head *head) +{ + struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu); + + spin_lock_bh(&gc_lock); + list_add_tail(&p->gc_list, &gc_list); + spin_unlock_bh(&gc_lock); + + schedule_delayed_work(&gc_work, gc_delay); +} + void inetpeer_invalidate_tree(int family) { struct inet_peer *old, *new, *prev; @@ -576,10 +587,7 @@ void inetpeer_invalidate_tree(int family) prev = cmpxchg(&base->root, old, new); if (prev == old) { base->total = 0; - spin_lock(&gc_lock); - list_add_tail(&prev->gc_list, &gc_list); - spin_unlock(&gc_lock); - schedule_delayed_work(&gc_work, gc_delay); + call_rcu(&prev->gc_rcu, inetpeer_inval_rcu); } out: From f25efd851cc8c0f63d74334bc784a437f4df8ee4 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Wed, 6 Jun 2012 14:12:07 -0400 Subject: [PATCH 244/475] NFS: Map minor mismatch error to protocol not support error. Sservers that only have NFSv4.1 support the NFS4ERR_MINOR_VERS_MISMATCH error is return on v4.0 mounts. Mapping that error to EPROTONOSUPPORT will cause the mount to back off to v3 instead of failing. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7c218fd1ec2..bfc919fd3f0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -105,6 +105,8 @@ static int nfs4_map_errors(int err) return -EINVAL; case -NFS4ERR_SHARE_DENIED: return -EACCES; + case -NFS4ERR_MINOR_VERS_MISMATCH: + return -EPROTONOSUPPORT; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); From 23e81d691a813839020f6e516b398d0f9369fe8b Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Wed, 6 Jun 2012 15:45:44 -0400 Subject: [PATCH 245/475] drm/i915: pch_irq_handler -> {ibx, cpt}_irq_handler Cougar/Panther Point redefine the bits in SDEIIR pretty completely. This function is just debugging, but if we're debugging we probably want to be told accurate things instead of lies. I'm told Lynx Point changes this yet more, but I have no idea how... Note from Eugeni's review: "For the record and for future enabling efforts, for LPT, bits 28-31 and 1-14 are gone since CPT/PPT (e.g., those must be zero). And there is the bit 15 as a new addition, but we are not using it yet and probably won't be using in foreseeable future." Signed-off-by: Adam Jackson Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=35103 Reviewed-by: Eugeni Dodonov Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 38 ++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 35 +++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1417660a93e..b1fe0edda95 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -510,7 +510,7 @@ out: return ret; } -static void pch_irq_handler(struct drm_device *dev, u32 pch_iir) +static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; int pipe; @@ -550,6 +550,35 @@ static void pch_irq_handler(struct drm_device *dev, u32 pch_iir) DRM_DEBUG_DRIVER("PCH transcoder A underrun interrupt\n"); } +static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir) +{ + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + int pipe; + + if (pch_iir & SDE_AUDIO_POWER_MASK_CPT) + DRM_DEBUG_DRIVER("PCH audio power change on port %d\n", + (pch_iir & SDE_AUDIO_POWER_MASK_CPT) >> + SDE_AUDIO_POWER_SHIFT_CPT); + + if (pch_iir & SDE_AUX_MASK_CPT) + DRM_DEBUG_DRIVER("AUX channel interrupt\n"); + + if (pch_iir & SDE_GMBUS_CPT) + DRM_DEBUG_DRIVER("PCH GMBUS interrupt\n"); + + if (pch_iir & SDE_AUDIO_CP_REQ_CPT) + DRM_DEBUG_DRIVER("Audio CP request interrupt\n"); + + if (pch_iir & SDE_AUDIO_CP_CHG_CPT) + DRM_DEBUG_DRIVER("Audio CP change interrupt\n"); + + if (pch_iir & SDE_FDI_MASK_CPT) + for_each_pipe(pipe) + DRM_DEBUG_DRIVER(" pipe %c FDI IIR: 0x%08x\n", + pipe_name(pipe), + I915_READ(FDI_RX_IIR(pipe))); +} + static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -591,7 +620,7 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS) if (pch_iir & SDE_HOTPLUG_MASK_CPT) queue_work(dev_priv->wq, &dev_priv->hotplug_work); - pch_irq_handler(dev, pch_iir); + cpt_irq_handler(dev, pch_iir); /* clear PCH hotplug event before clear CPU irq */ I915_WRITE(SDEIIR, pch_iir); @@ -684,7 +713,10 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS) if (de_iir & DE_PCH_EVENT) { if (pch_iir & hotplug_mask) queue_work(dev_priv->wq, &dev_priv->hotplug_work); - pch_irq_handler(dev, pch_iir); + if (HAS_PCH_CPT(dev)) + cpt_irq_handler(dev, pch_iir); + else + ibx_irq_handler(dev, pch_iir); } if (de_iir & DE_PCU_EVENT) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 76bc2756d7c..48d5e8e051c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3321,7 +3321,7 @@ /* PCH */ -/* south display engine interrupt */ +/* south display engine interrupt: IBX */ #define SDE_AUDIO_POWER_D (1 << 27) #define SDE_AUDIO_POWER_C (1 << 26) #define SDE_AUDIO_POWER_B (1 << 25) @@ -3357,15 +3357,44 @@ #define SDE_TRANSA_CRC_ERR (1 << 1) #define SDE_TRANSA_FIFO_UNDER (1 << 0) #define SDE_TRANS_MASK (0x3f) -/* CPT */ -#define SDE_CRT_HOTPLUG_CPT (1 << 19) + +/* south display engine interrupt: CPT/PPT */ +#define SDE_AUDIO_POWER_D_CPT (1 << 31) +#define SDE_AUDIO_POWER_C_CPT (1 << 30) +#define SDE_AUDIO_POWER_B_CPT (1 << 29) +#define SDE_AUDIO_POWER_SHIFT_CPT 29 +#define SDE_AUDIO_POWER_MASK_CPT (7 << 29) +#define SDE_AUXD_CPT (1 << 27) +#define SDE_AUXC_CPT (1 << 26) +#define SDE_AUXB_CPT (1 << 25) +#define SDE_AUX_MASK_CPT (7 << 25) #define SDE_PORTD_HOTPLUG_CPT (1 << 23) #define SDE_PORTC_HOTPLUG_CPT (1 << 22) #define SDE_PORTB_HOTPLUG_CPT (1 << 21) +#define SDE_CRT_HOTPLUG_CPT (1 << 19) #define SDE_HOTPLUG_MASK_CPT (SDE_CRT_HOTPLUG_CPT | \ SDE_PORTD_HOTPLUG_CPT | \ SDE_PORTC_HOTPLUG_CPT | \ SDE_PORTB_HOTPLUG_CPT) +#define SDE_GMBUS_CPT (1 << 17) +#define SDE_AUDIO_CP_REQ_C_CPT (1 << 10) +#define SDE_AUDIO_CP_CHG_C_CPT (1 << 9) +#define SDE_FDI_RXC_CPT (1 << 8) +#define SDE_AUDIO_CP_REQ_B_CPT (1 << 6) +#define SDE_AUDIO_CP_CHG_B_CPT (1 << 5) +#define SDE_FDI_RXB_CPT (1 << 4) +#define SDE_AUDIO_CP_REQ_A_CPT (1 << 2) +#define SDE_AUDIO_CP_CHG_A_CPT (1 << 1) +#define SDE_FDI_RXA_CPT (1 << 0) +#define SDE_AUDIO_CP_REQ_CPT (SDE_AUDIO_CP_REQ_C_CPT | \ + SDE_AUDIO_CP_REQ_B_CPT | \ + SDE_AUDIO_CP_REQ_A_CPT) +#define SDE_AUDIO_CP_CHG_CPT (SDE_AUDIO_CP_CHG_C_CPT | \ + SDE_AUDIO_CP_CHG_B_CPT | \ + SDE_AUDIO_CP_CHG_A_CPT) +#define SDE_FDI_MASK_CPT (SDE_FDI_RXC_CPT | \ + SDE_FDI_RXB_CPT | \ + SDE_FDI_RXA_CPT) #define SDEISR 0xc4000 #define SDEIMR 0xc4004 From ea80dadec7a06889562b478cf0b87afbe62b7ac8 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 6 Jun 2012 14:54:13 +0900 Subject: [PATCH 246/475] [SCSI] Fix sd_probe_domain config problem With CONFIG_BLK_DEV_SD = n and CONFIG_PM = n, you get this compile failure: (.text+0x4f6c77): undefined reference to `scsi_sd_probe_domain' This was introduced by commit a7a20d103994fd760766e6c9d494daa569cbfe06 Author: Dan Williams Date: Thu Mar 22 17:05:11 2012 -0700 [SCSI] sd: limit the scope of the async probe domain And happens because scsi_sd_probe_domain is conditionally defined but unconditionally used. Fix this by making the symbol unconditionally defined. Reported-by: Randy Dunlap Cc: Dan Williams Tested-by: Randy Dunlap Signed-off-by: James Bottomley --- drivers/scsi/scsi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 61c82a345f8..bbbc9c918d4 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -90,11 +90,9 @@ unsigned int scsi_logging_level; EXPORT_SYMBOL(scsi_logging_level); #endif -#if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_BLK_DEV_SD) -/* sd and scsi_pm need to coordinate flushing async actions */ +/* sd, scsi core and power management need to coordinate flushing async actions */ LIST_HEAD(scsi_sd_probe_domain); EXPORT_SYMBOL(scsi_sd_probe_domain); -#endif /* NB: These are exposed through /proc/scsi/scsi and form part of the ABI. * You may not alter any existing entry (although adding new ones is From f2bf1f6f5f89d031245067512449fc889b2f4bb2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jun 2012 19:50:40 -0400 Subject: [PATCH 247/475] tracing: Have tracing_off() actually turn tracing off A recent update to have tracing_on/off() only affect the ftrace ring buffers instead of all ring buffers had a cut and paste error. The tracing_off() did the exact same thing as tracing_on() and would not actually turn off tracing. Unfortunately, tracing_off() is more important to be working than tracing_on() as this is a key development tool, as it lets the developer turn off tracing as soon as a problem is discovered. It is also used by panic and oops code. This bug also breaks the 'echo func:traceoff > set_ftrace_filter' Cc: # 3.4 Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 68032c6177d..49249c28690 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -371,7 +371,7 @@ EXPORT_SYMBOL_GPL(tracing_on); void tracing_off(void) { if (global_trace.buffer) - ring_buffer_record_on(global_trace.buffer); + ring_buffer_record_off(global_trace.buffer); /* * This flag is only looked at when buffers haven't been * allocated yet. We don't really care about the race From 8f5af6f1f2d09fe5eac86a5dc1731a5917c1503a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 4 May 2012 08:31:53 -0700 Subject: [PATCH 248/475] rcu: RCU_FAST_NO_HZ detection of callback adoption In the present implementations of CPU hotplug, the outgoing CPU is guaranteed to run its stop-machine process on the way out, which will guarantee that RCU_FAST_NO_HZ forces the CPU out of dyntick-idle mode. However, new versions of CPU hotplug might not work this way. This commit therefore removes this design constraint by explicitly notifying CPUs when they adopt non-lazy RCU callbacks. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- kernel/rcutree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0da7b88d92d..3b0f1337f75 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1397,6 +1397,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) rdp->qlen_lazy += rsp->qlen_lazy; rdp->qlen += rsp->qlen; rdp->n_cbs_adopted += rsp->qlen; + if (rsp->qlen_lazy != rsp->qlen) + rcu_idle_count_callbacks_posted(); rsp->qlen_lazy = 0; rsp->qlen = 0; From fd4b352687fd8604d49c190c4c9ea9e369fd42d5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 5 May 2012 19:10:35 -0700 Subject: [PATCH 249/475] rcu: Update RCU_FAST_NO_HZ tracing for lazy callbacks In the current code, a short dyntick-idle interval (where there is at least one non-lazy callback on the CPU) and a long dyntick-idle interval (where there are only lazy callbacks on the CPU) are traced identically, which can be less than helpful. This commit therefore emits different event traces in these two cases. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- include/trace/events/rcu.h | 1 + kernel/rcutree_plugin.h | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 1480900c511..d274734b2aa 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -289,6 +289,7 @@ TRACE_EVENT(rcu_dyntick, * "In holdoff": Nothing to do, holding off after unsuccessful attempt. * "Begin holdoff": Attempt failed, don't retry until next jiffy. * "Dyntick with callbacks": Entering dyntick-idle despite callbacks. + * "Dyntick with lazy callbacks": Entering dyntick-idle w/lazy callbacks. * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! * "Timer": Timer fired to cause CPU to continue processing callbacks. diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 2411000d986..5449f02c482 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2165,15 +2165,17 @@ static void rcu_prepare_for_idle(int cpu) !rcu_pending(cpu) && !local_softirq_pending()) { /* Can we go dyntick-idle despite still having callbacks? */ - trace_rcu_prep_idle("Dyntick with callbacks"); per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + trace_rcu_prep_idle("Dyntick with callbacks"); per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies + RCU_IDLE_GP_DELAY; - else + } else { per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies + RCU_IDLE_LAZY_GP_DELAY; + trace_rcu_prep_idle("Dyntick with lazy callbacks"); + } tp = &per_cpu(rcu_idle_gp_timer, cpu); mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); per_cpu(rcu_nonlazy_posted_snap, cpu) = From 5955f7eecd77d6b440db278b266cfecdb72ecd00 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 12:07:05 -0700 Subject: [PATCH 250/475] rcu: Move RCU_FAST_NO_HZ per-CPU variables to rcu_dynticks structure The RCU_FAST_NO_HZ code relies on a number of per-CPU variables. This works, but is hidden from someone scanning the data structures in rcutree.h. This commit therefore converts these per-CPU variables to fields in the per-CPU rcu_dynticks structures. Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- kernel/rcutree.h | 14 ++++++ kernel/rcutree_plugin.h | 99 ++++++++++++++++++----------------------- 2 files changed, 58 insertions(+), 55 deletions(-) diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7f5d138dedf..ea056495783 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -84,6 +84,20 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ +#ifdef CONFIG_RCU_FAST_NO_HZ + int dyntick_drain; /* Prepare-for-idle state variable. */ + unsigned long dyntick_holdoff; + /* No retries for the jiffy of failure. */ + struct timer_list idle_gp_timer; + /* Wake up CPU sleeping with callbacks. */ + unsigned long idle_gp_timer_expires; + /* When to wake up CPU (for repost). */ + bool idle_first_pass; /* First pass of attempt to go idle? */ + unsigned long nonlazy_posted; + /* # times non-lazy CBs posted to CPU. */ + unsigned long nonlazy_posted_snap; + /* idle-period nonlazy_posted snapshot. */ +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; /* RCU's kthread states for tracing. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5449f02c482..6bd9637d5d8 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1962,21 +1962,6 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -/* Loop counter for rcu_prepare_for_idle(). */ -static DEFINE_PER_CPU(int, rcu_dyntick_drain); -/* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */ -static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); -/* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */ -static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer); -/* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */ -static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires); -/* Enable special processing on first attempt to enter dyntick-idle mode. */ -static DEFINE_PER_CPU(bool, rcu_idle_first_pass); -/* Running count of non-lazy callbacks posted, never decremented. */ -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted); -/* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */ -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); - /* * Allow the CPU to enter dyntick-idle mode if either: (1) There are no * callbacks on this CPU, (2) this CPU has not yet attempted to enter @@ -1988,13 +1973,15 @@ static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); */ int rcu_needs_cpu(int cpu) { + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + /* Flag a new idle sojourn to the idle-entry state machine. */ - per_cpu(rcu_idle_first_pass, cpu) = 1; + rdtp->idle_first_pass = 1; /* If no callbacks, RCU doesn't need the CPU. */ if (!rcu_cpu_has_callbacks(cpu)) return 0; /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ - return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; + return rdtp->dyntick_holdoff == jiffies; } /* @@ -2075,21 +2062,24 @@ static void rcu_idle_gp_timer_func(unsigned long cpu_in) */ static void rcu_prepare_for_idle_init(int cpu) { - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), - rcu_idle_gp_timer_func, cpu); - per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1; - per_cpu(rcu_idle_first_pass, cpu) = 1; + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + rdtp->dyntick_holdoff = jiffies - 1; + setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); + rdtp->idle_gp_timer_expires = jiffies - 1; + rdtp->idle_first_pass = 1; } /* * Clean up for exit from idle. Because we are exiting from idle, there - * is no longer any point to rcu_idle_gp_timer, so cancel it. This will + * is no longer any point to ->idle_gp_timer, so cancel it. This will * do nothing if this timer is not active, so just cancel it unconditionally. */ static void rcu_cleanup_after_idle(int cpu) { - del_timer(&per_cpu(rcu_idle_gp_timer, cpu)); + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + del_timer(&rdtp->idle_gp_timer); trace_rcu_prep_idle("Cleanup after idle"); } @@ -2108,42 +2098,41 @@ static void rcu_cleanup_after_idle(int cpu) * Because it is not legal to invoke rcu_process_callbacks() with irqs * disabled, we do one pass of force_quiescent_state(), then do a * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked - * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. + * later. The ->dyntick_drain field controls the sequencing. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { struct timer_list *tp; + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); /* * If this is an idle re-entry, for example, due to use of * RCU_NONIDLE() or the new idle-loop tracing API within the idle * loop, then don't take any state-machine actions, unless the * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks + * Instead, repost the ->idle_gp_timer if this CPU has callbacks * pending. */ - if (!per_cpu(rcu_idle_first_pass, cpu) && - (per_cpu(rcu_nonlazy_posted, cpu) == - per_cpu(rcu_nonlazy_posted_snap, cpu))) { + if (!rdtp->idle_first_pass && + (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { if (rcu_cpu_has_callbacks(cpu)) { - tp = &per_cpu(rcu_idle_gp_timer, cpu); - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); } return; } - per_cpu(rcu_idle_first_pass, cpu) = 0; - per_cpu(rcu_nonlazy_posted_snap, cpu) = - per_cpu(rcu_nonlazy_posted, cpu) - 1; + rdtp->idle_first_pass = 0; + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; /* * If there are no callbacks on this CPU, enter dyntick-idle mode. * Also reset state to avoid prejudicing later attempts. */ if (!rcu_cpu_has_callbacks(cpu)) { - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - per_cpu(rcu_dyntick_drain, cpu) = 0; + rdtp->dyntick_holdoff = jiffies - 1; + rdtp->dyntick_drain = 0; trace_rcu_prep_idle("No callbacks"); return; } @@ -2152,38 +2141,37 @@ static void rcu_prepare_for_idle(int cpu) * If in holdoff mode, just return. We will presumably have * refrained from disabling the scheduling-clock tick. */ - if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { + if (rdtp->dyntick_holdoff == jiffies) { trace_rcu_prep_idle("In holdoff"); return; } - /* Check and update the rcu_dyntick_drain sequencing. */ - if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* Check and update the ->dyntick_drain sequencing. */ + if (rdtp->dyntick_drain <= 0) { /* First time through, initialize the counter. */ - per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; - } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && + rdtp->dyntick_drain = RCU_IDLE_FLUSHES; + } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && !rcu_pending(cpu) && !local_softirq_pending()) { /* Can we go dyntick-idle despite still having callbacks? */ - per_cpu(rcu_dyntick_drain, cpu) = 0; - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; + rdtp->dyntick_drain = 0; + rdtp->dyntick_holdoff = jiffies; if (rcu_cpu_has_nonlazy_callbacks(cpu)) { trace_rcu_prep_idle("Dyntick with callbacks"); - per_cpu(rcu_idle_gp_timer_expires, cpu) = + rdtp->idle_gp_timer_expires = jiffies + RCU_IDLE_GP_DELAY; } else { - per_cpu(rcu_idle_gp_timer_expires, cpu) = + rdtp->idle_gp_timer_expires = jiffies + RCU_IDLE_LAZY_GP_DELAY; trace_rcu_prep_idle("Dyntick with lazy callbacks"); } - tp = &per_cpu(rcu_idle_gp_timer, cpu); - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); - per_cpu(rcu_nonlazy_posted_snap, cpu) = - per_cpu(rcu_nonlazy_posted, cpu); + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; return; /* Nothing more to do immediately. */ - } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { + } else if (--(rdtp->dyntick_drain) <= 0) { /* We have hit the limit, so time to give up. */ - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; + rdtp->dyntick_holdoff = jiffies; trace_rcu_prep_idle("Begin holdoff"); invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ return; @@ -2229,7 +2217,7 @@ static void rcu_prepare_for_idle(int cpu) */ static void rcu_idle_count_callbacks_posted(void) { - __this_cpu_add(rcu_nonlazy_posted, 1); + __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); } #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ @@ -2240,11 +2228,12 @@ static void rcu_idle_count_callbacks_posted(void) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { - struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu); + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct timer_list *tltp = &rdtp->idle_gp_timer; sprintf(cp, "drain=%d %c timer=%lu", - per_cpu(rcu_dyntick_drain, cpu), - per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', + rdtp->dyntick_drain, + rdtp->dyntick_holdoff == jiffies ? 'H' : '.', timer_pending(tltp) ? tltp->expires - jiffies : -1); } From aa9b16306e3243229580ff889cc59fd66bf77973 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 May 2012 16:41:44 -0700 Subject: [PATCH 251/475] rcu: Precompute RCU_FAST_NO_HZ timer offsets When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick() calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the next wakeup time based on the timer wheels. Only later, when actually entering the idle loop, rcu_prepare_for_idle() will be invoked. In some cases, rcu_prepare_for_idle() will post timers to wake the CPU back up. But all for naught: The next wakeup time for the CPU has already been computed, and posting a timer afterwards does not force that wakeup time to be recomputed. This means that rcu_prepare_for_idle()'s have no effect. This is not a problem on a busy system because something else will wake up the CPU soon enough. However, on lightly loaded systems, the CPU might stay asleep for a considerable length of time. If that CPU has a callback that the rest of the system is waiting on, the system might run very slowly or (in theory) even hang. This commit avoids this problem by having rcu_needs_cpu() give tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU to wake back up, which tick_nohz_stop_sched_tick() takes into account when programming the CPU's wakeup time. An alternative approach is for rcu_prepare_for_idle() to use hrtimers instead of normal timers, but timers are much more efficient than are hrtimers for frequently and repeatedly posting and cancelling a given timer, which is exactly what RCU_FAST_NO_HZ does. Reported-by: Pascal Chapperon Reported-by: Heiko Carstens Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- include/linux/rcutiny.h | 6 ++-- include/linux/rcutree.h | 2 +- kernel/rcutree_plugin.h | 66 ++++++++++++++++++++++++++-------------- kernel/time/tick-sched.c | 7 ++++- 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index adb5e5a38ca..854dc4c5c27 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -87,8 +87,9 @@ static inline void kfree_call_rcu(struct rcu_head *head, #ifdef CONFIG_TINY_RCU -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return 0; } @@ -96,8 +97,9 @@ static inline int rcu_needs_cpu(int cpu) int rcu_preempt_needs_cpu(void); -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return rcu_preempt_needs_cpu(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 3c6083cde4f..952b7933930 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -32,7 +32,7 @@ extern void rcu_init(void); extern void rcu_note_context_switch(int cpu); -extern int rcu_needs_cpu(int cpu); +extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); extern void rcu_cpu_stall_reset(void); /* diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 6bd9637d5d8..5271a020887 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ -int rcu_needs_cpu(int cpu) +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return rcu_cpu_has_callbacks(cpu); } @@ -1962,28 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -/* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! - */ -int rcu_needs_cpu(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - /* Flag a new idle sojourn to the idle-entry state machine. */ - rdtp->idle_first_pass = 1; - /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) - return 0; - /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ - return rdtp->dyntick_holdoff == jiffies; -} - /* * Does the specified flavor of RCU have non-lazy callbacks pending on * the specified CPU? Both RCU flavor and CPU are specified by the @@ -2026,6 +2005,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) rcu_preempt_cpu_has_nonlazy_callbacks(cpu); } +/* + * Allow the CPU to enter dyntick-idle mode if either: (1) There are no + * callbacks on this CPU, (2) this CPU has not yet attempted to enter + * dyntick-idle mode, or (3) this CPU is in the process of attempting to + * enter dyntick-idle mode. Otherwise, if we have recently tried and failed + * to enter dyntick-idle mode, we refuse to try to enter it. After all, + * it is better to incur scheduling-clock interrupts than to spin + * continuously for the same time duration! + * + * The delta_jiffies argument is used to store the time when RCU is + * going to need the CPU again if it still has callbacks. The reason + * for this is that rcu_prepare_for_idle() might need to post a timer, + * but if so, it will do so after tick_nohz_stop_sched_tick() has set + * the wakeup time for this CPU. This means that RCU's timer can be + * delayed until the wakeup time, which defeats the purpose of posting + * a timer. + */ +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +{ + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + /* Flag a new idle sojourn to the idle-entry state machine. */ + rdtp->idle_first_pass = 1; + /* If no callbacks, RCU doesn't need the CPU. */ + if (!rcu_cpu_has_callbacks(cpu)) { + *delta_jiffies = ULONG_MAX; + return 0; + } + if (rdtp->dyntick_holdoff == jiffies) { + /* RCU recently tried and failed, so don't try again. */ + *delta_jiffies = 1; + return 1; + } + /* Set up for the possibility that RCU will post a timer. */ + if (rcu_cpu_has_nonlazy_callbacks(cpu)) + *delta_jiffies = RCU_IDLE_GP_DELAY; + else + *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; + return 0; +} + /* * Handler for smp_call_function_single(). The only point of this * handler is to wake the CPU up, so the handler does only tracing. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9ff56..52f5ebbd443 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; + unsigned long rcu_delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; @@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; @@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; + if (rcu_delta_jiffies < delta_jiffies) { + next_jiffies = last_jiffies + rcu_delta_jiffies; + delta_jiffies = rcu_delta_jiffies; + } } /* * Do not stop the tick, if we are only one off From e9b4cf2094a65a05a831f070e46c554260632330 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 15:22:41 +0300 Subject: [PATCH 252/475] UBI: fix debugfs-less systems support Commit "aa44d1d UBI: remove Kconfig debugging option" broke UBI and it refuses to initialize if debugfs (CONFIG_DEBUG_FS) is disabled. I incorrectly assumed that debugfs files creation function will return success if debugfs is disabled, but they actually return -ENODEV. This patch fixes the issue. Reported-by: Paul Parsons Signed-off-by: Artem Bityutskiy Tested-by: Paul Parsons --- drivers/mtd/ubi/debug.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 9f957c2d48e..09d4f8d9d59 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -264,6 +264,9 @@ static struct dentry *dfs_rootdir; */ int ubi_debugfs_init(void) { + if (!IS_ENABLED(DEBUG_FS)) + return 0; + dfs_rootdir = debugfs_create_dir("ubi", NULL); if (IS_ERR_OR_NULL(dfs_rootdir)) { int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); @@ -281,7 +284,8 @@ int ubi_debugfs_init(void) */ void ubi_debugfs_exit(void) { - debugfs_remove(dfs_rootdir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove(dfs_rootdir); } /* Read an UBI debugfs file */ @@ -403,6 +407,9 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi) struct dentry *dent; struct ubi_debug_info *d = ubi->dbg; + if (!IS_ENABLED(DEBUG_FS)) + return 0; + n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, ubi->ubi_num); if (n == UBI_DFS_DIR_LEN) { @@ -470,5 +477,6 @@ out: */ void ubi_debugfs_exit_dev(struct ubi_device *ubi) { - debugfs_remove_recursive(ubi->dbg->dfs_dir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove_recursive(ubi->dbg->dfs_dir); } From 818039c7d597db3b1d30964a8f9489ac42c0642d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 16:03:10 +0300 Subject: [PATCH 253/475] UBIFS: fix debugfs-less systems support Commit "f70b7e5 UBIFS: remove Kconfig debugging option" broke UBIFS and it refuses to initialize if debugfs (CONFIG_DEBUG_FS) is disabled. I incorrectly assumed that debugfs files creation function will return success if debugfs is disabled, but they actually return -ENODEV. This patch fixes the issue. Reported-by: Paul Parsons Signed-off-by: Artem Bityutskiy Tested-by: Paul Parsons --- fs/ubifs/debug.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 685a83756b2..84a7e6f3c04 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2918,6 +2918,9 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) struct dentry *dent; struct ubifs_debug_info *d = c->dbg; + if (!IS_ENABLED(DEBUG_FS)) + return 0; + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); if (n == UBIFS_DFS_DIR_LEN) { @@ -3010,7 +3013,8 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - debugfs_remove_recursive(c->dbg->dfs_dir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove_recursive(c->dbg->dfs_dir); } struct ubifs_global_debug_info ubifs_dbg; @@ -3095,6 +3099,9 @@ int dbg_debugfs_init(void) const char *fname; struct dentry *dent; + if (!IS_ENABLED(DEBUG_FS)) + return 0; + fname = "ubifs"; dent = debugfs_create_dir(fname, NULL); if (IS_ERR_OR_NULL(dent)) @@ -3159,7 +3166,8 @@ out: */ void dbg_debugfs_exit(void) { - debugfs_remove_recursive(dfs_rootdir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove_recursive(dfs_rootdir); } /** From 12027f1b3fd69a4e9017e6b13c72547a99c6cf54 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 7 Jun 2012 15:15:30 +0300 Subject: [PATCH 254/475] UBI: correct ubi_wl_flush locking Commit "62f38455 UBI: modify ubi_wl_flush function to clear work queue for a lnum" takes the 'work_sem' semaphore in write mode for the entire loop, which is not very good because it will block other workers for potentially long time. We do not need to have it in write mode - read mode is enough, and we do not need to hole it over the entire loop. So this patch turns changes the locking: takes 'work_sem' in read mode and pushes it down to the loop. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/wl.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 9df100a4ec3..b6be644e7b8 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1262,11 +1262,11 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) dbg_wl("flush pending work for LEB %d:%d (%d pending works)", vol_id, lnum, ubi->works_count); - down_write(&ubi->work_sem); while (found) { struct ubi_work *wrk; found = 0; + down_read(&ubi->work_sem); spin_lock(&ubi->wl_lock); list_for_each_entry(wrk, &ubi->works, list) { if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && @@ -1277,18 +1277,27 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) spin_unlock(&ubi->wl_lock); err = wrk->func(ubi, wrk, 0); - if (err) - goto out; + if (err) { + up_read(&ubi->work_sem); + return err; + } + spin_lock(&ubi->wl_lock); found = 1; break; } } spin_unlock(&ubi->wl_lock); + up_read(&ubi->work_sem); } -out: + /* + * Make sure all the works which have been done in parallel are + * finished. + */ + down_write(&ubi->work_sem); up_write(&ubi->work_sem); + return err; } From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 17 May 2012 22:35:46 +0000 Subject: [PATCH 255/475] netfilter: xt_HMARK: fix endianness and provide consistent hashing This patch addresses two issues: a) Fix usage of u32 and __be32 that causes endianess warnings via sparse. b) Ensure consistent hashing in a cluster that is composed of big and little endian systems. Thus, we obtain the same hash mark in an heterogeneous cluster. Reported-by: Dan Carpenter Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_HMARK.h | 5 +++ net/netfilter/xt_HMARK.c | 72 +++++++++++++++++------------- 2 files changed, 46 insertions(+), 31 deletions(-) diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h index abb1650940d..826fc580757 100644 --- a/include/linux/netfilter/xt_HMARK.h +++ b/include/linux/netfilter/xt_HMARK.h @@ -27,7 +27,12 @@ union hmark_ports { __u16 src; __u16 dst; } p16; + struct { + __be16 src; + __be16 dst; + } b16; __u32 v32; + __be32 b32; }; struct xt_hmark_info { diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 0a96a43108e..1686ca1b53a 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -32,13 +32,13 @@ MODULE_ALIAS("ipt_HMARK"); MODULE_ALIAS("ip6t_HMARK"); struct hmark_tuple { - u32 src; - u32 dst; + __be32 src; + __be32 dst; union hmark_ports uports; - uint8_t proto; + u8 proto; }; -static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) +static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) { return (addr32[0] & mask[0]) ^ (addr32[1] & mask[1]) ^ @@ -46,8 +46,8 @@ static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) (addr32[3] & mask[3]); } -static inline u32 -hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) +static inline __be32 +hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) { switch (l3num) { case AF_INET: @@ -58,6 +58,22 @@ hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) return 0; } +static inline void hmark_swap_ports(union hmark_ports *uports, + const struct xt_hmark_info *info) +{ + union hmark_ports hp; + u16 src, dst; + + hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; + src = ntohs(hp.b16.src); + dst = ntohs(hp.b16.dst); + + if (dst > src) + uports->v32 = (dst << 16) | src; + else + uports->v32 = (src << 16) | dst; +} + static int hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, const struct xt_hmark_info *info) @@ -74,22 +90,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all, - info->src_mask.all); - t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all, - info->dst_mask.all); + t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, + info->src_mask.ip6); + t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, + info->dst_mask.ip6); if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; t->proto = nf_ct_protonum(ct); if (t->proto != IPPROTO_ICMP) { - t->uports.p16.src = otuple->src.u.all; - t->uports.p16.dst = rtuple->src.u.all; - t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | - info->port_set.v32; - if (t->uports.p16.dst < t->uports.p16.src) - swap(t->uports.p16.dst, t->uports.p16.src); + t->uports.b16.src = otuple->src.u.all; + t->uports.b16.dst = rtuple->src.u.all; + hmark_swap_ports(&t->uports, info); } return 0; @@ -98,15 +111,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, #endif } +/* This hash function is endian independent, to ensure consistent hashing if + * the cluster is composed of big and little endian systems. */ static inline u32 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) { u32 hash; + u32 src = ntohl(t->src); + u32 dst = ntohl(t->dst); - if (t->dst < t->src) - swap(t->src, t->dst); + if (dst < src) + swap(src, dst); - hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd); + hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); hash = hash ^ (t->proto & info->proto_mask); return (((u64)hash * info->hmodulus) >> 32) + info->hoffset; @@ -126,11 +143,7 @@ hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) return; - t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | - info->port_set.v32; - - if (t->uports.p16.dst < t->uports.p16.src) - swap(t->uports.p16.dst, t->uports.p16.src); + hmark_swap_ports(&t->uports, info); } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -178,8 +191,8 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, return -1; } noicmp: - t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all); - t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all); + t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); + t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; @@ -255,11 +268,8 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, } } - t->src = (__force u32) ip->saddr; - t->dst = (__force u32) ip->daddr; - - t->src &= info->src_mask.ip; - t->dst &= info->dst_mask.ip; + t->src = ip->saddr & info->src_mask.ip; + t->dst = ip->daddr & info->dst_mask.ip; if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; From d109e9af61a6d2fdf33dc615ab8b724a8e75a8a4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Jun 2012 13:31:04 +0200 Subject: [PATCH 256/475] netfilter: nf_ct_h323: fix bug in rtcp natting The nat_rtp_rtcp hook takes two separate parameters port and rtp_port. port is expected to be the real h245 address (found inside the packet). rtp_port is the even number closest to port (RTP ports are even and RTCP ports are odd). However currently, both port and rtp_port are having same value (both are rounded to nearest even numbers). This works well in case of openlogicalchannel with media (RTP/even) port. But in case of openlogicalchannel for media control (RTCP/odd) port, h245 address in the packet is wrongly modified to have an even port. I am attaching a pcap demonstrating the problem, for any further analysis. This behavior was introduced around v2.6.19 while rewriting the helper. Signed-off-by: Jagdish Motwani Signed-off-by: Sanket Shah Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 46d69d7f1bb..31f50bc3a31 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -270,9 +270,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, return 0; /* RTP port is even */ - port &= htons(~1); - rtp_port = port; - rtcp_port = htons(ntohs(port) + 1); + rtp_port = port & ~htons(1); + rtcp_port = port | htons(1); /* Create expect for RTP */ if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL) From f07936f2c446bd5310e669c8e6eab3f812ea665a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2012 10:21:03 -0400 Subject: [PATCH 257/475] NFS: Remove incorrect BUG_ON in nfs_found_client It is perfectly valid for nfs_get_client() to return a nfs_client that is in the process of setting up the NFSv4.1 session. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7d108753af8..17ba6b99565 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -544,8 +544,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, smp_rmb(); - BUG_ON(clp->cl_cons_state != NFS_CS_READY); - dprintk("<-- %s found nfs_client %p for %s\n", __func__, clp, cl_init->hostname ?: ""); return clp; From 743628e868c5992354fc80b4d1e9a6143da1c0e6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 7 Jun 2012 09:05:21 -0700 Subject: [PATCH 258/475] x86, efi stub: Add .reloc section back into image Some UEFI firmware will not load a .efi with a .reloc section with a size of 0. Therefore, we create a .efi image with 4 main areas and 3 sections. 1. PE/COFF file header 2. .setup section (covers all setup code following the first sector) 3. .reloc section (contains 1 dummy reloc entry, created in build.c) 4. .text section (covers the remaining kernel image) To make room for the new .setup section data, the header bugger_off_msg had to be shortened. Reported-by: Henrik Rydberg Signed-off-by: Jordan Justen Link: http://lkml.kernel.org/r/1339085121-12760-1-git-send-email-jordan.l.justen@intel.com Tested-by: Lee G Rosenbaum Tested-by: Henrik Rydberg Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/boot/header.S | 42 ++++++--- arch/x86/boot/tools/build.c | 172 +++++++++++++++++++++++------------- 2 files changed, 140 insertions(+), 74 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 8bbea6aa40d..efe5acfc79c 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -94,10 +94,10 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct booting from floppy is no longer supported.\r\n" - .ascii "Please use a boot loader program instead.\r\n" + .ascii "Direct floppy boot is not supported. " + .ascii "Use a boot loader program instead.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot . . .\r\n" + .ascii "Remove disk and press any key to reboot ...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -111,7 +111,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 2 # nr_sections + .word 3 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -158,8 +158,8 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long 0x1000 # SectionAlignment - .long 0x200 # FileAlignment + .long 0x20 # SectionAlignment + .long 0x20 # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion .word 0 # MajorImageVersion @@ -200,8 +200,10 @@ extra_header_fields: # Section table section_table: - .ascii ".text" - .byte 0 + # + # The offset & size fields are filled in by build.c. + # + .ascii ".setup" .byte 0 .byte 0 .long 0 @@ -217,9 +219,8 @@ section_table: # # The EFI application loader requires a relocation section - # because EFI applications must be relocatable. But since - # we don't need the loader to fixup any relocs for us, we - # just create an empty (zero-length) .reloc section header. + # because EFI applications must be relocatable. The .reloc + # offset & size fields are filled in by build.c. # .ascii ".reloc" .byte 0 @@ -233,6 +234,25 @@ section_table: .word 0 # NumberOfRelocations .word 0 # NumberOfLineNumbers .long 0x42100040 # Characteristics (section flags) + + # + # The offset & size fields are filled in by build.c. + # + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x60500020 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 3f61f6e2b46..4b8e165ee57 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -50,6 +50,8 @@ typedef unsigned int u32; u8 buf[SETUP_SECT_MAX*512]; int is_big_kernel; +#define PECOFF_RELOC_RESERVE 0x20 + /*----------------------------------------------------------------------*/ static const u32 crctab32[] = { @@ -133,11 +135,103 @@ static void usage(void) die("Usage: build setup system [> image]"); } +#ifdef CONFIG_EFI_STUB + +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + unsigned int pe_header; + unsigned short num_sections; + u8 *section; + + pe_header = get_unaligned_le32(&buf[0x3c]); + num_sections = get_unaligned_le16(&buf[pe_header + 6]); + +#ifdef CONFIG_X86_32 + section = &buf[pe_header + 0xa8]; +#else + section = &buf[pe_header + 0xb8]; +#endif + + while (num_sections > 0) { + if (strncmp((char*)section, section_name, 8) == 0) { + /* section header size field */ + put_unaligned_le32(size, section + 0x8); + + /* section header vma field */ + put_unaligned_le32(offset, section + 0xc); + + /* section header 'size of initialised data' field */ + put_unaligned_le32(size, section + 0x10); + + /* section header 'file offset' field */ + put_unaligned_le32(offset, section + 0x14); + + break; + } + section += 0x28; + num_sections--; + } +} + +static void update_pecoff_setup_and_reloc(unsigned int size) +{ + u32 setup_offset = 0x200; + u32 reloc_offset = size - PECOFF_RELOC_RESERVE; + u32 setup_size = reloc_offset - setup_offset; + + update_pecoff_section_header(".setup", setup_offset, setup_size); + update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); + + /* + * Modify .reloc section contents with a single entry. The + * relocation is applied to offset 10 of the relocation section. + */ + put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); + put_unaligned_le32(10, &buf[reloc_offset + 4]); +} + +static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) +{ + unsigned int pe_header; + unsigned int text_sz = file_sz - text_start; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of image */ + put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); + + /* + * Size of code: Subtract the size of the first sector (512 bytes) + * which includes the header. + */ + put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]); + +#ifdef CONFIG_X86_32 + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]); +#else + /* + * Address of entry point. startup_32 is at the beginning and + * the 64-bit entry point (startup_64) is always 512 bytes + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation + */ + put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]); +#endif /* CONFIG_X86_32 */ + + update_pecoff_section_header(".text", text_start, text_sz); +} + +#endif /* CONFIG_EFI_STUB */ + int main(int argc, char ** argv) { -#ifdef CONFIG_EFI_STUB - unsigned int file_sz, pe_header; -#endif unsigned int i, sz, setup_sectors; int c; u32 sys_size; @@ -163,6 +257,12 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); +#ifdef CONFIG_EFI_STUB + /* Reserve 0x20 bytes for .reloc section */ + memset(buf+c, 0, PECOFF_RELOC_RESERVE); + c += PECOFF_RELOC_RESERVE; +#endif + /* Pad unused space with zeros */ setup_sectors = (c + 511) / 512; if (setup_sectors < SETUP_SECT_MIN) @@ -170,6 +270,10 @@ int main(int argc, char ** argv) i = setup_sectors*512; memset(buf+c, 0, i-c); +#ifdef CONFIG_EFI_STUB + update_pecoff_setup_and_reloc(i); +#endif + /* Set the default root device */ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); @@ -194,66 +298,8 @@ int main(int argc, char ** argv) put_unaligned_le32(sys_size, &buf[0x1f4]); #ifdef CONFIG_EFI_STUB - file_sz = sz + i + ((sys_size * 16) - sz); - - pe_header = get_unaligned_le32(&buf[0x3c]); - - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - - /* - * Subtract the size of the first section (512 bytes) which - * includes the header and .reloc section. The remaining size - * is that of the .text section. - */ - file_sz -= 512; - - /* Size of code */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]); - -#ifdef CONFIG_X86_32 - /* - * Address of entry point. - * - * The EFI stub entry point is +16 bytes from the start of - * the .text section. - */ - put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xb4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xbc]); -#else - /* - * Address of entry point. startup_32 is at the beginning and - * the 64-bit entry point (startup_64) is always 512 bytes - * after. The EFI stub entry point is 16 bytes after that, as - * the first instruction allows legacy loaders to jump over - * the EFI stub initialisation - */ - put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xc4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xcc]); -#endif /* CONFIG_X86_32 */ -#endif /* CONFIG_EFI_STUB */ + update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); +#endif crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, stdout) != i) From fb47ddc9d5ded3d202335ea29743b9242d54eb5a Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 7 Jun 2012 11:42:12 -0400 Subject: [PATCH 259/475] NFS4: Fix open bug when pnfs module blacklisted Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 29fd23c0efd..64f90d845f6 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -365,7 +365,7 @@ static inline bool pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, struct nfs_server *nfss) { - return (dst && src && src->bm != 0 && + return (dst && src && src->bm != 0 && nfss->pnfs_curr_ld && nfss->pnfs_curr_ld->id == src->l_type); } From 02c67525cf325131aa06c6b0c6fd457bd57161e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2012 13:45:53 -0400 Subject: [PATCH 260/475] NFSv4.1: Convert another trivial printk into a dprintk Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bfc919fd3f0..5881f2cc5d8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5297,7 +5297,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status) - pr_warn("NFS: Got error %d from the server %s on " + dprintk("NFS: Got error %d from the server %s on " "DESTROY_CLIENTID.", status, clp->cl_hostname); return status; } From a06998b88b1651c5f71c0e35f528bf2057188ead Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jun 2012 00:07:20 +0000 Subject: [PATCH 261/475] net: l2tp_eth: fix kernel panic on rmmod l2tp_eth We must prevent module unloading if some devices are still attached to l2tp_eth driver. Signed-off-by: Eric Dumazet Reported-by: Denys Fedoryshchenko Tested-by: Denys Fedoryshchenko Cc: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 443591d629c..185f12f4a5f 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -162,6 +162,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (dev) { unregister_netdev(dev); spriv->dev = NULL; + module_put(THIS_MODULE); } } } @@ -249,6 +250,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p if (rc < 0) goto out_del_dev; + __module_get(THIS_MODULE); /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); From 4bd6683bd400c8b1d2ad544bb155d86a5d10f91c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jun 2012 04:58:35 +0000 Subject: [PATCH 262/475] net: neighbour: fix neigh_dump_info() Denys found out "ip neigh" output was truncated to about 54 neighbours. Signed-off-by: Eric Dumazet Reported-by: Denys Fedoryshchenko Signed-off-by: David S. Miller --- net/core/neighbour.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eb09f8bbbf0..d81d026138f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2219,9 +2219,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - for (h = 0; h < (1 << nht->hash_shift); h++) { - if (h < s_h) - continue; + for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; @@ -2260,9 +2258,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, read_lock_bh(&tbl->lock); - for (h = 0; h <= PNEIGH_HASHMASK; h++) { - if (h < s_h) - continue; + for (h = s_h; h <= PNEIGH_HASHMASK; h++) { if (h > s_h) s_idx = 0; for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { @@ -2297,7 +2293,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) struct neigh_table *tbl; int t, family, s_t; int proxy = 0; - int err = 0; + int err; read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; @@ -2311,7 +2307,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; - for (tbl = neigh_tables, t = 0; tbl && (err >= 0); + for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { if (t < s_t || (family && tbl->family != family)) continue; @@ -2322,6 +2318,8 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) err = pneigh_dump_table(tbl, skb, cb); else err = neigh_dump_table(tbl, skb, cb); + if (err < 0) + break; } read_unlock(&neigh_tbl_lock); From 8bd74516b1bd9308c17f67583134d93f777203ca Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 7 Jun 2012 06:51:04 +0000 Subject: [PATCH 263/475] ipv6: fib: Restore NTF_ROUTER exception in fib6_age() Commit 5339ab8b1dd82 (ipv6: fib: Convert fib6_age() to dst_neigh_lookup().) seems to have mistakenly inverted the exception for cached NTF_ROUTER routes. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0c220a41662..74c21b924a7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1561,7 +1561,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) neigh_flags = neigh->flags; neigh_release(neigh); } - if (neigh_flags & NTF_ROUTER) { + if (!(neigh_flags & NTF_ROUTER)) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; From 5ff0feac88ced864f44adb145142269196fa79d9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 6 Jun 2012 10:01:30 +0000 Subject: [PATCH 264/475] sky2: fix checksum bit management on some chips The newer flavors of Yukon II use a different method for receive checksum offload. This is indicated in the driver by the SKY2_HW_NEW_LE flag. On these newer chips, the BMU_ENA_RX_CHKSUM should not be set. The driver would get incorrectly toggle the bit, enabling the old checksum logic on these chips and cause a BUG_ON() assertion. If receive checksum was toggled via ethtool. Reported-by: Kirill Smelkov Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index cace36f2ab9..28a54451a3e 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4381,10 +4381,12 @@ static int sky2_set_features(struct net_device *dev, netdev_features_t features) struct sky2_port *sky2 = netdev_priv(dev); netdev_features_t changed = dev->features ^ features; - if (changed & NETIF_F_RXCSUM) { - bool on = features & NETIF_F_RXCSUM; - sky2_write32(sky2->hw, Q_ADDR(rxqaddr[sky2->port], Q_CSR), - on ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); + if ((changed & NETIF_F_RXCSUM) && + !(sky2->hw->flags & SKY2_HW_NEW_LE)) { + sky2_write32(sky2->hw, + Q_ADDR(rxqaddr[sky2->port], Q_CSR), + (features & NETIF_F_RXCSUM) + ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); } if (changed & NETIF_F_RXHASH) From 278f015e9b67566991d4e831fe38e0ebbeef245e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 6 Jun 2012 08:45:59 +0000 Subject: [PATCH 265/475] appletalk: Remove out of date message in printk I accidentally triggered this printk, which amused me for a few moments. Given we're post 2.2, we could just -EACCES, but does anyone even care about Appletalk now ? I figure it's better to leave sleeping dogs lie, and just update the message. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0301b328cf0..86852963b7f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1208,9 +1208,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, if (addr->sat_addr.s_node == ATADDR_BCAST && !sock_flag(sk, SOCK_BROADCAST)) { #if 1 - printk(KERN_WARNING "%s is broken and did not set " - "SO_BROADCAST. It will break when 2.2 is " - "released.\n", + pr_warn("atalk_connect: %s is broken and did not set SO_BROADCAST.\n", current->comm); #else return -EACCES; From 0142ef6cdca5f9784eb0762ac50fe378d98d71d4 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 7 Jun 2012 14:21:09 -0700 Subject: [PATCH 266/475] shmem: replace_page must flush_dcache and others Commit bde05d1ccd51 ("shmem: replace page if mapping excludes its zone") is not at all likely to break for anyone, but it was an earlier version from before review feedback was incorporated. Fix that up now. * shmem_replace_page must flush_dcache_page after copy_highpage [akpm] * Expand comment on why shmem_unuse_inode needs page_swapcount [akpm] * Remove excess of VM_BUG_ONs from shmem_replace_page [wangcong] * Check page_private matches swap before calling shmem_replace_page [hughd] * shmem_replace_page allow for unexpected race in radix_tree lookup [hughd] Signed-off-by: Hugh Dickins Cc: Cong Wang Cc: Christoph Hellwig Cc: KAMEZAWA Hiroyuki Cc: Alan Cox Cc: Stephane Marchesin Cc: Andi Kleen Cc: Dave Airlie Cc: Daniel Vetter Cc: Rob Clark Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 57 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 585bd220a21..a15a466d0d1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -683,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, mutex_lock(&shmem_swaplist_mutex); /* * We needed to drop mutex to make that restrictive page - * allocation; but the inode might already be freed by now, - * and we cannot refer to inode or mapping or info to check. - * However, we do hold page lock on the PageSwapCache page, - * so can check if that still has our reference remaining. + * allocation, but the inode might have been freed while we + * dropped it: although a racing shmem_evict_inode() cannot + * complete without emptying the radix_tree, our page lock + * on this swapcache page is not enough to prevent that - + * free_swap_and_cache() of our swap entry will only + * trylock_page(), removing swap from radix_tree whatever. + * + * We must not proceed to shmem_add_to_page_cache() if the + * inode has been freed, but of course we cannot rely on + * inode or mapping or info to check that. However, we can + * safely check if our swap entry is still in use (and here + * it can't have got reused for another page): if it's still + * in use, then the inode cannot have been freed yet, and we + * can safely proceed (if it's no longer in use, that tells + * nothing about the inode, but we don't need to unuse swap). */ if (!page_swapcount(*pagep)) error = -ENOENT; @@ -730,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page) /* * There's a faint possibility that swap page was replaced before - * caller locked it: it will come back later with the right page. + * caller locked it: caller will come back later with the right page. */ - if (unlikely(!PageSwapCache(page))) + if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val)) goto out; /* @@ -995,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, newpage = shmem_alloc_page(gfp, info, index); if (!newpage) return -ENOMEM; - VM_BUG_ON(shmem_should_replace_page(newpage, gfp)); - *pagep = newpage; page_cache_get(newpage); copy_highpage(newpage, oldpage); + flush_dcache_page(newpage); - VM_BUG_ON(!PageLocked(oldpage)); __set_page_locked(newpage); - VM_BUG_ON(!PageUptodate(oldpage)); SetPageUptodate(newpage); - VM_BUG_ON(!PageSwapBacked(oldpage)); SetPageSwapBacked(newpage); - VM_BUG_ON(!swap_index); set_page_private(newpage, swap_index); - VM_BUG_ON(!PageSwapCache(oldpage)); SetPageSwapCache(newpage); /* @@ -1019,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, spin_lock_irq(&swap_mapping->tree_lock); error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, newpage); - __inc_zone_page_state(newpage, NR_FILE_PAGES); - __dec_zone_page_state(oldpage, NR_FILE_PAGES); + if (!error) { + __inc_zone_page_state(newpage, NR_FILE_PAGES); + __dec_zone_page_state(oldpage, NR_FILE_PAGES); + } spin_unlock_irq(&swap_mapping->tree_lock); - BUG_ON(error); - mem_cgroup_replace_page_cache(oldpage, newpage); - lru_cache_add_anon(newpage); + if (unlikely(error)) { + /* + * Is this possible? I think not, now that our callers check + * both PageSwapCache and page_private after getting page lock; + * but be defensive. Reverse old to newpage for clear and free. + */ + oldpage = newpage; + } else { + mem_cgroup_replace_page_cache(oldpage, newpage); + lru_cache_add_anon(newpage); + *pagep = newpage; + } ClearPageSwapCache(oldpage); set_page_private(oldpage, 0); @@ -1033,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, unlock_page(oldpage); page_cache_release(oldpage); page_cache_release(oldpage); - return 0; + return error; } /* @@ -1107,7 +1123,8 @@ repeat: /* We have to do this with page locked to prevent races */ lock_page(page); - if (!PageSwapCache(page) || page->mapping) { + if (!PageSwapCache(page) || page_private(page) != swap.val || + page->mapping) { error = -EEXIST; /* try again */ goto failed; } From 6305902c2f871fd6db60af367bd7120fa977fa74 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 7 Jun 2012 14:21:10 -0700 Subject: [PATCH 267/475] MAINTAINERS: whitespace fixes Remove trailing spaces at EOL. Always use a tab after the type : Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index dafcba7e231..14bc7071f9d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1077,7 +1077,7 @@ F: drivers/media/video/s5p-fimc/ ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT M: Kyungmin Park M: Kamil Debski -M: Jeongtae Park +M: Jeongtae Park L: linux-arm-kernel@lists.infradead.org L: linux-media@vger.kernel.org S: Maintained @@ -1743,10 +1743,10 @@ F: include/linux/can/platform/ CAPABILITIES M: Serge Hallyn L: linux-security-module@vger.kernel.org -S: Supported +S: Supported F: include/linux/capability.h F: security/capability.c -F: security/commoncap.c +F: security/commoncap.c F: kernel/capability.c CELL BROADBAND ENGINE ARCHITECTURE @@ -2146,11 +2146,11 @@ S: Orphan F: drivers/net/wan/pc300* CYTTSP TOUCHSCREEN DRIVER -M: Javier Martinez Canillas -L: linux-input@vger.kernel.org -S: Maintained -F: drivers/input/touchscreen/cyttsp* -F: include/linux/input/cyttsp.h +M: Javier Martinez Canillas +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/input/touchscreen/cyttsp* +F: include/linux/input/cyttsp.h DAMA SLAVE for AX.25 M: Joerg Reuter @@ -5185,7 +5185,7 @@ S: Maintained F: drivers/firmware/pcdp.* PCI ERROR RECOVERY -M: Linas Vepstas +M: Linas Vepstas L: linux-pci@vger.kernel.org S: Supported F: Documentation/PCI/pci-error-recovery.txt From bafb282df29c1524b1617019adebd6d0c3eb7a47 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 7 Jun 2012 14:21:11 -0700 Subject: [PATCH 268/475] c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal A fix for commit b32dfe377102 ("c/r: prctl: add ability to set new mm_struct::exe_file"). After removing mm->num_exe_file_vmas kernel keeps mm->exe_file until final mmput(), it never becomes NULL while task is alive. We can check for other mapped files in mm instead of checking mm->num_exe_file_vmas, and mark mm with flag MMF_EXE_FILE_CHANGED in order to forbid second changing of mm->exe_file. Signed-off-by: Konstantin Khlebnikov Reviewed-by: Cyrill Gorcunov Cc: Oleg Nesterov Cc: Matt Helsley Cc: Kees Cook Cc: KOSAKI Motohiro Cc: Tejun Heo Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/sys.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c5447..c688d4cc2e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm); /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) diff --git a/kernel/sys.c b/kernel/sys.c index 9ff89cb9657..54f20fdee93 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1796,17 +1796,11 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma, static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { + struct vm_area_struct *vma; struct file *exe_file; struct dentry *dentry; int err; - /* - * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's - * remain. So perform a quick test first. - */ - if (mm->num_exe_file_vmas) - return -EBUSY; - exe_file = fget(fd); if (!exe_file) return -EBADF; @@ -1827,17 +1821,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) if (err) goto exit; + down_write(&mm->mmap_sem); + + /* + * Forbid mm->exe_file change if there are mapped other files. + */ + err = -EBUSY; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma->vm_file && !path_equal(&vma->vm_file->f_path, + &exe_file->f_path)) + goto exit_unlock; + } + /* * The symlink can be changed only once, just to disallow arbitrary * transitions malicious software might bring in. This means one * could make a snapshot over all processes running and monitor * /proc/pid/exe changes to notice unusual activity if needed. */ - down_write(&mm->mmap_sem); - if (likely(!mm->exe_file)) - set_mm_exe_file(mm, exe_file); - else - err = -EBUSY; + err = -EPERM; + if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) + goto exit_unlock; + + set_mm_exe_file(mm, exe_file); +exit_unlock: up_write(&mm->mmap_sem); exit: From 1ad75b9e16280ca4e2501a629a225319cf2eef2e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 7 Jun 2012 14:21:11 -0700 Subject: [PATCH 269/475] c/r: prctl: add minimal address test to PR_SET_MM Make sure the address being set is greater than mmap_min_addr (as suggested by Kees Cook). Signed-off-by: Cyrill Gorcunov Acked-by: Kees Cook Cc: Serge Hallyn Cc: Tejun Heo Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sys.c b/kernel/sys.c index 54f20fdee93..19a2c713996 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1869,7 +1869,7 @@ static int prctl_set_mm(int opt, unsigned long addr, if (opt == PR_SET_MM_EXE_FILE) return prctl_set_mm_exe_file(mm, (unsigned int)addr); - if (addr >= TASK_SIZE) + if (addr >= TASK_SIZE || addr < mmap_min_addr) return -EINVAL; error = -EINVAL; From 300f786b2683f8bb1ec0afb6e1851183a479c86d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 7 Jun 2012 14:21:12 -0700 Subject: [PATCH 270/475] c/r: prctl: add ability to get clear_tid_address Zero is written at clear_tid_address when the process exits. This functionality is used by pthread_join(). We already have sys_set_tid_address() to change this address for the current task but there is no way to obtain it from user space. Without the ability to find this address and dump it we can't restore pthread'ed apps which call pthread_join() once they have been restored. This patch introduces the PR_GET_TID_ADDRESS prctl option which allows the current process to obtain own clear_tid_address. This feature is available iif CONFIG_CHECKPOINT_RESTORE is set. [akpm@linux-foundation.org: fix prctl numbering] Signed-off-by: Andrew Vagin Signed-off-by: Cyrill Gorcunov Cc: Pedro Alves Cc: Oleg Nesterov Cc: Pavel Emelyanov Cc: Tejun Heo Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 10 ++++++---- kernel/sys.c | 13 +++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 711e0a30aac..3988012255d 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -127,8 +127,8 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) -#define PR_SET_CHILD_SUBREAPER 36 -#define PR_GET_CHILD_SUBREAPER 37 +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 /* * If no_new_privs is set, then operations that grant new privileges (i.e. @@ -142,7 +142,9 @@ * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. */ -#define PR_SET_NO_NEW_PRIVS 38 -#define PR_GET_NO_NEW_PRIVS 39 +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 19a2c713996..0ec1942ba7e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1988,12 +1988,22 @@ out: up_read(&mm->mmap_sem); return error; } + +static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) +{ + return put_user(me->clear_child_tid, tid_addr); +} + #else /* CONFIG_CHECKPOINT_RESTORE */ static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { return -EINVAL; } +static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) +{ + return -EINVAL; +} #endif SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, @@ -2131,6 +2141,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else return -EINVAL; break; + case PR_GET_TID_ADDRESS: + error = prctl_get_tid_address(me, (int __user **)arg2); + break; default: return -EINVAL; } From 736f24d5e59d699c6e300c5da7e3bb882eddda67 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 7 Jun 2012 14:21:12 -0700 Subject: [PATCH 271/475] c/r: prctl: drop VMA flags test on PR_SET_MM_ stack data assignment In commit b76437579d13 ("procfs: mark thread stack correctly in proc//maps") the stack allocated via clone() is marked in /proc//maps as [stack:%d] thus it might be out of the former mm->start_stack/end_stack values (and even has some custom VMA flags set). So to be able to restore mm->start_stack/end_stack drop vma flags test, but still require the underlying VMA to exist. As always note this feature is under CONFIG_CHECKPOINT_RESTORE and requires CAP_SYS_RESOURCE to be granted. Signed-off-by: Cyrill Gorcunov Cc: Oleg Nesterov Acked-by: Kees Cook Cc: Pavel Emelyanov Cc: Serge Hallyn Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 0ec1942ba7e..f0ec44dcd41 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1786,14 +1786,6 @@ SYSCALL_DEFINE1(umask, int, mask) } #ifdef CONFIG_CHECKPOINT_RESTORE -static bool vma_flags_mismatch(struct vm_area_struct *vma, - unsigned long required, - unsigned long banned) -{ - return (vma->vm_flags & required) != required || - (vma->vm_flags & banned); -} - static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct vm_area_struct *vma; @@ -1931,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EFAULT; goto out; } -#ifdef CONFIG_STACK_GROWSUP - if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) -#else - if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) -#endif - goto out; if (opt == PR_SET_MM_START_STACK) mm->start_stack = addr; else if (opt == PR_SET_MM_ARG_START) From 4e791c98ae7ff889121ca93b7bd97206e4a8d793 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Jun 2012 14:21:12 -0700 Subject: [PATCH 272/475] drivers/platform/x86/acerhdf.c: correct Boris' mail address Correct mail address reference to a mail account which I actually read. Signed-off-by: Borislav Petkov Cc: Peter Feuerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/platform/x86/acerhdf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 639db4d0aa7..2fd9d36acd1 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -5,7 +5,7 @@ * * (C) 2009 - Peter Feuerer peter (a) piie.net * http://piie.net - * 2009 Borislav Petkov + * 2009 Borislav Petkov bp (a) alien8.de * * Inspired by and many thanks to: * o acerfand - Rachel Greenham From 7d8a45695cc8f9fcdf4121fcbd897ecb63f758e4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 7 Jun 2012 14:21:13 -0700 Subject: [PATCH 273/475] ipc: shm: restore MADV_REMOVE functionality on shared memory segments Commit 17cf28afea2a ("mm/fs: remove truncate_range") removed the truncate_range inode operation in favour of the fallocate file operation. When using SYSV IPC shared memory segments, calling madvise with the MADV_REMOVE advice on an area of shared memory will attempt to invoke the .fallocate function for the shm_file_operations, which is NULL and therefore returns -EOPNOTSUPP to userspace. The previous behaviour would inherit the inode_operations from the underlying tmpfs file and invoke truncate_range there. This patch restores the previous behaviour by wrapping the underlying fallocate function in shm_fallocate, as we do for fsync. [hughd@google.com: use -ENOTSUPP in shm_fallocate()] Signed-off-by: Will Deacon Acked-by: Hugh Dickins Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ipc/shm.c b/ipc/shm.c index 5e2cbfdab6f..41c1285d697 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -393,6 +393,16 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) return sfd->file->f_op->fsync(sfd->file, start, end, datasync); } +static long shm_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct shm_file_data *sfd = shm_file_data(file); + + if (!sfd->file->f_op->fallocate) + return -EOPNOTSUPP; + return sfd->file->f_op->fallocate(file, mode, offset, len); +} + static unsigned long shm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -410,6 +420,7 @@ static const struct file_operations shm_file_operations = { .get_unmapped_area = shm_get_unmapped_area, #endif .llseek = noop_llseek, + .fallocate = shm_fallocate, }; static const struct file_operations shm_file_operations_huge = { @@ -418,6 +429,7 @@ static const struct file_operations shm_file_operations_huge = { .release = shm_release, .get_unmapped_area = shm_get_unmapped_area, .llseek = noop_llseek, + .fallocate = shm_fallocate, }; int is_file_shm_hugepages(struct file *file) From cbf8ae32f66a9ceb8907ad9e16663c2a29e48990 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 7 Jun 2012 14:21:13 -0700 Subject: [PATCH 274/475] btree: fix tree corruption in btree_get_prev() The memory the parameter __key points to is used as an iterator in btree_get_prev(), so if we save off a bkey() pointer in retry_key and then assign that to __key, we'll end up corrupting the btree internals when we do eg longcpy(__key, bkey(geo, node, i), geo->keylen); to return the key value. What we should do instead is use longcpy() to copy the key value that retry_key points to __key. This can cause a btree to get corrupted by seemingly read-only operations such as btree_for_each_safe. [akpm@linux-foundation.org: avoid the double longcpy()] Signed-off-by: Roland Dreier Acked-by: Joern Engel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/btree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/btree.c b/lib/btree.c index e5ec1e9c1aa..5cf9e74ec3f 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, if (head->height == 0) return NULL; -retry: longcpy(key, __key, geo->keylen); +retry: dec_key(geo, key); node = head->node; @@ -351,7 +351,7 @@ retry: } miss: if (retry_key) { - __key = retry_key; + longcpy(key, retry_key, geo->keylen); retry_key = NULL; goto retry; } From 39caa0916ef27cf1da5026eb708a2b8413156f75 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Thu, 7 Jun 2012 14:21:14 -0700 Subject: [PATCH 275/475] btree: catch NULL value before it does harm Storing NULL values in the btree is illegal and can lead to memory corruption and possible other fun as well. Catch it on insert, instead of waiting for the inevitable. Signed-off-by: Joern Engel Signed-off-by: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/btree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/btree.c b/lib/btree.c index 5cf9e74ec3f..f9a484676cb 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -509,6 +509,7 @@ retry: int btree_insert(struct btree_head *head, struct btree_geo *geo, unsigned long *key, void *val, gfp_t gfp) { + BUG_ON(!val); return btree_insert_level(head, geo, key, val, 1, gfp); } EXPORT_SYMBOL_GPL(btree_insert); From 40af1bbdca47e5c8a2044039bb78ca8fd8b20f94 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 7 Jun 2012 14:21:14 -0700 Subject: [PATCH 276/475] mm: correctly synchronize rss-counters at exit/exec mm->rss_stat counters have per-task delta: task->rss_stat. Before changing task->mm pointer the kernel must flush this delta with sync_mm_rss(). do_exit() already calls sync_mm_rss() to flush the rss-counters before committing the rss statistics into task->signal->maxrss, taskstats, audit and other stuff. Unfortunately the kernel does this before calling mm_release(), which can call put_user() for processing task->clear_child_tid. So at this point we can trigger page-faults and task->rss_stat becomes non-zero again. As a result mm->rss_stat becomes inconsistent and check_mm() will print something like this: | BUG: Bad rss-counter state mm:ffff88020813c380 idx:1 val:-1 | BUG: Bad rss-counter state mm:ffff88020813c380 idx:2 val:1 This patch moves sync_mm_rss() into mm_release(), and moves mm_release() out of do_exit() and calls it earlier. After mm_release() there should be no pagefaults. [akpm@linux-foundation.org: tweak comment] Signed-off-by: Konstantin Khlebnikov Reported-by: Markus Trippelsdorf Cc: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: [3.4.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 - kernel/exit.c | 13 ++++++++----- kernel/fork.c | 8 ++++++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index a79786a8d2c..b926ed19301 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -819,7 +819,6 @@ static int exec_mmap(struct mm_struct *mm) /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; - sync_mm_rss(old_mm); mm_release(tsk, old_mm); if (old_mm) { diff --git a/kernel/exit.c b/kernel/exit.c index 34867cc5b42..804fb6bb816 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -423,6 +423,7 @@ void daemonize(const char *name, ...) * user space pages. We don't need them, and if we didn't close them * they would be locked into memory. */ + mm_release(current, current->mm); exit_mm(current); /* * We don't want to get frozen, in case system-wide hibernation @@ -640,7 +641,6 @@ static void exit_mm(struct task_struct * tsk) struct mm_struct *mm = tsk->mm; struct core_state *core_state; - mm_release(tsk, mm); if (!mm) return; /* @@ -960,9 +960,13 @@ void do_exit(long code) preempt_count()); acct_update_integrals(tsk); - /* sync mm's RSS info before statistics gathering */ - if (tsk->mm) - sync_mm_rss(tsk->mm); + + /* Set exit_code before complete_vfork_done() in mm_release() */ + tsk->exit_code = code; + + /* Release mm and sync mm's RSS info before statistics gathering */ + mm_release(tsk, tsk->mm); + group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); @@ -975,7 +979,6 @@ void do_exit(long code) tty_audit_exit(); audit_free(tsk); - tsk->exit_code = code; taskstats_exit(tsk, group_dead); exit_mm(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e62..0560781c690 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -619,6 +619,14 @@ void mmput(struct mm_struct *mm) module_put(mm->binfmt->module); mmdrop(mm); } + + /* + * Final rss-counter synchronization. After this point there must be + * no pagefaults into this mm from the current context. Otherwise + * mm->rss_stat will be inconsistent. + */ + if (mm) + sync_mm_rss(mm); } EXPORT_SYMBOL_GPL(mmput); From 2d8dbb04c63e5369988f008bc4df3359c01d8812 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Tue, 5 Jun 2012 03:41:42 +0000 Subject: [PATCH 277/475] snmp: fix OutOctets counter to include forwarded datagrams RFC 4293 defines ipIfStatsOutOctets (similar definition for ipSystemStatsOutOctets): The total number of octets in IP datagrams delivered to the lower layers for transmission. Octets from datagrams counted in ipIfStatsOutTransmits MUST be counted here. And ipIfStatsOutTransmits: The total number of IP datagrams that this entity supplied to the lower layers for transmission. This includes datagrams generated locally and those forwarded by this entity. Therefore, IPSTATS_MIB_OUTOCTETS must be incremented when incrementing IPSTATS_MIB_OUTFORWDATAGRAMS. IP_UPD_PO_STATS is not used since ipIfStatsOutRequests must not include forwarded datagrams: The total number of IP datagrams that local IP user-protocols (including ICMP) supplied to IP in requests for transmission. Note that this counter does not include any datagrams counted in ipIfStatsOutForwDatagrams. Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 1 + net/ipv4/ipmr.c | 1 + net/ipv6/ip6_output.c | 1 + net/ipv6/ip6mr.c | 2 ++ 4 files changed, 5 insertions(+) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e5c44fc586a..ab09b126423 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -44,6 +44,7 @@ static int ip_forward_finish(struct sk_buff *skb) struct ip_options *opt = &(IPCB(skb)->opt); IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a9e519ad6db..c94bbc6f2ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1574,6 +1574,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) struct ip_options *opt = &(IPCB(skb)->opt); IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 17b8c67998b..decc21d19c5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -526,6 +526,7 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b15dc08643a..461e47c8e95 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1886,6 +1886,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) { IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(skb); } From 752a6a5f84bfed18d0709383913d9d9d21b61c77 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 May 2012 10:00:12 +0100 Subject: [PATCH 278/475] regmap: Export regmap_reinit_cache() It's supposed to be there for drivers. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 04b48027c21..c89aa01fb1d 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -472,6 +472,7 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config) return ret; } +EXPORT_SYMBOL_GPL(regmap_reinit_cache); /** * regmap_exit(): Free a previously allocated register map From b0dd6b70f0fda17ae9762fbb72d98e40a4f66556 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 7 Jun 2012 18:56:06 -0400 Subject: [PATCH 279/475] ext4: fix the free blocks calculation for ext3 file systems w/ uninit_bg Ext3 filesystems that are converted to use as many ext4 file system features as possible will enable uninit_bg to speed up e2fsck times. These file systems will have a native ext3 layout of inode tables and block allocation bitmaps (as opposed to ext4's flex_bg layout). Unfortunately, in these cases, when first allocating a block in an uninitialized block group, ext4 would incorrectly calculate the number of free blocks in that block group, and then errorneously report that the file system was corrupt: EXT4-fs error (device vdd): ext4_mb_generate_buddy:741: group 30, 32254 clusters in bitmap, 32258 in gd This problem can be reproduced via: mke2fs -q -t ext4 -O ^flex_bg /dev/vdd 5g mount -t ext4 /dev/vdd /mnt fallocate -l 4600m /mnt/test The problem was caused by a bone headed mistake in the check to see if a particular metadata block was part of the block group. Many thanks to Kees Cook for finding and bisecting the buggy commit which introduced this bug (commit fd034a84e1, present since v3.2). Reported-by: Sander Eikelenboom Reported-by: Kees Cook Signed-off-by: "Theodore Ts'o" Tested-by: Kees Cook Cc: stable@kernel.org --- fs/ext4/balloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 99b6324290d..cee7812cc3c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -90,8 +90,8 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb, * unusual file system layouts. */ if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) { - block_cluster = EXT4_B2C(sbi, (start - - ext4_block_bitmap(sb, gdp))); + block_cluster = EXT4_B2C(sbi, + ext4_block_bitmap(sb, gdp) - start); if (block_cluster < num_clusters) block_cluster = -1; else if (block_cluster == num_clusters) { @@ -102,7 +102,7 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb, if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) { inode_cluster = EXT4_B2C(sbi, - start - ext4_inode_bitmap(sb, gdp)); + ext4_inode_bitmap(sb, gdp) - start); if (inode_cluster < num_clusters) inode_cluster = -1; else if (inode_cluster == num_clusters) { @@ -114,7 +114,7 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb, itbl_blk = ext4_inode_table(sb, gdp); for (i = 0; i < sbi->s_itb_per_group; i++) { if (ext4_block_in_group(sb, itbl_blk + i, block_group)) { - c = EXT4_B2C(sbi, start - itbl_blk + i); + c = EXT4_B2C(sbi, itbl_blk + i - start); if ((c < num_clusters) || (c == inode_cluster) || (c == block_cluster) || (c == itbl_cluster)) continue; From 35ea0655a1bcdb1dcb1c1d0c4b6b391ade9c6d01 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Tue, 5 Jun 2012 19:26:59 +0100 Subject: [PATCH 280/475] ASoC: dpcm: Fix dpcm_get_be() to check that DAI is BE Make sure that the dpcm_get_be() only returns BE DAI links. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index bedd1717a37..48fd15b312c 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -794,6 +794,9 @@ static struct snd_soc_pcm_runtime *dpcm_get_be(struct snd_soc_card *card, for (i = 0; i < card->num_links; i++) { be = &card->rtd[i]; + if (!be->dai_link->no_pcm) + continue; + if (be->cpu_dai->playback_widget == widget || be->codec_dai->playback_widget == widget) return be; @@ -803,6 +806,9 @@ static struct snd_soc_pcm_runtime *dpcm_get_be(struct snd_soc_card *card, for (i = 0; i < card->num_links; i++) { be = &card->rtd[i]; + if (!be->dai_link->no_pcm) + continue; + if (be->cpu_dai->capture_widget == widget || be->codec_dai->capture_widget == widget) return be; From 90c6ce0d5482ee52b0a427115e185d488783164b Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Tue, 5 Jun 2012 19:27:15 +0100 Subject: [PATCH 281/475] ASoC: dapm: Fix input list to use source widgets We should only add source widgets to the input list. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index b47fe75444a..89eae93445c 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -913,7 +913,7 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget, /* do we need to add this widget to the list ? */ if (list) { int err; - err = dapm_list_add_widget(list, path->sink); + err = dapm_list_add_widget(list, path->source); if (err < 0) { dev_err(widget->dapm->dev, "could not add widget %s\n", widget->name); From 3ff1ec27e9f567824e73316025a9025528494506 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Jun 2012 06:53:48 +0800 Subject: [PATCH 282/475] ASoC: wm2000: Always use a 4s timeout for the firmware Rather than having varying timeouts depending on the transition always use a 4s timeout. This provides better diagnostics for clocking errors and ensures compatibility with current calibration firmwares. Signed-off-by: Mark Brown --- sound/soc/codecs/wm2000.c | 59 ++++++++++++++------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index a75c3766aed..0418fa11e6b 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -99,8 +99,9 @@ static void wm2000_reset(struct wm2000_priv *wm2000) } static int wm2000_poll_bit(struct i2c_client *i2c, - unsigned int reg, u8 mask, int timeout) + unsigned int reg, u8 mask) { + int timeout = 4000; int val; val = wm2000_read(i2c, reg); @@ -119,7 +120,7 @@ static int wm2000_poll_bit(struct i2c_client *i2c, static int wm2000_power_up(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int ret, timeout; + int ret; BUG_ON(wm2000->anc_mode != ANC_OFF); @@ -140,13 +141,13 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) /* Wait for ANC engine to become ready */ if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, - WM2000_ANC_ENG_IDLE, 1)) { + WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, "ANC engine failed to reset\n"); return -ETIMEDOUT; } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_BOOT_COMPLETE, 1)) { + WM2000_STATUS_BOOT_COMPLETE)) { dev_err(&i2c->dev, "ANC engine failed to initialise\n"); return -ETIMEDOUT; } @@ -173,16 +174,13 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) dev_dbg(&i2c->dev, "Download complete\n"); if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_MOUSE_ENABLE | WM2000_MODE_THERMAL_ENABLE); } else { - timeout = 10; - wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_MOUSE_ENABLE | WM2000_MODE_THERMAL_ENABLE); @@ -201,9 +199,8 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) wm2000_write(i2c, WM2000_REG_SYS_CTL2, WM2000_ANC_INT_N_CLR); if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_MOUSE_ACTIVE, timeout)) { - dev_err(&i2c->dev, "Timed out waiting for device after %dms\n", - timeout * 10); + WM2000_STATUS_MOUSE_ACTIVE)) { + dev_err(&i2c->dev, "Timed out waiting for device\n"); return -ETIMEDOUT; } @@ -218,28 +215,25 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) static int wm2000_power_down(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int timeout; if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_POWER_DOWN); } else { - timeout = 10; wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_POWER_DOWN); } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_POWER_DOWN_COMPLETE, timeout)) { + WM2000_STATUS_POWER_DOWN_COMPLETE)) { dev_err(&i2c->dev, "Timeout waiting for ANC power down\n"); return -ETIMEDOUT; } if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, - WM2000_ANC_ENG_IDLE, 1)) { + WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, "Timeout waiting for ANC engine idle\n"); return -ETIMEDOUT; } @@ -268,13 +262,13 @@ static int wm2000_enter_bypass(struct i2c_client *i2c, int analogue) } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_ANC_DISABLED, 10)) { + WM2000_STATUS_ANC_DISABLED)) { dev_err(&i2c->dev, "Timeout waiting for ANC disable\n"); return -ETIMEDOUT; } if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, - WM2000_ANC_ENG_IDLE, 1)) { + WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, "Timeout waiting for ANC engine idle\n"); return -ETIMEDOUT; } @@ -311,7 +305,7 @@ static int wm2000_exit_bypass(struct i2c_client *i2c, int analogue) wm2000_write(i2c, WM2000_REG_SYS_CTL2, WM2000_ANC_INT_N_CLR); if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_MOUSE_ACTIVE, 10)) { + WM2000_STATUS_MOUSE_ACTIVE)) { dev_err(&i2c->dev, "Timed out waiting for MOUSE\n"); return -ETIMEDOUT; } @@ -325,38 +319,32 @@ static int wm2000_exit_bypass(struct i2c_client *i2c, int analogue) static int wm2000_enter_standby(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int timeout; BUG_ON(wm2000->anc_mode != ANC_ACTIVE); if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_STANDBY_ENTRY); } else { - timeout = 10; - wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_STANDBY_ENTRY); } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_ANC_DISABLED, timeout)) { + WM2000_STATUS_ANC_DISABLED)) { dev_err(&i2c->dev, "Timed out waiting for ANC disable after 1ms\n"); return -ETIMEDOUT; } - if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, WM2000_ANC_ENG_IDLE, - 1)) { + if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, - "Timed out waiting for standby after %dms\n", - timeout * 10); + "Timed out waiting for standby\n"); return -ETIMEDOUT; } @@ -374,23 +362,19 @@ static int wm2000_enter_standby(struct i2c_client *i2c, int analogue) static int wm2000_exit_standby(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int timeout; BUG_ON(wm2000->anc_mode != ANC_STANDBY); wm2000_write(i2c, WM2000_REG_SYS_CTL1, 0); if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_MOUSE_ENABLE); } else { - timeout = 10; - wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_MOUSE_ENABLE); @@ -400,9 +384,8 @@ static int wm2000_exit_standby(struct i2c_client *i2c, int analogue) wm2000_write(i2c, WM2000_REG_SYS_CTL2, WM2000_ANC_INT_N_CLR); if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_MOUSE_ACTIVE, timeout)) { - dev_err(&i2c->dev, "Timed out waiting for MOUSE after %dms\n", - timeout * 10); + WM2000_STATUS_MOUSE_ACTIVE)) { + dev_err(&i2c->dev, "Timed out waiting for MOUSE\n"); return -ETIMEDOUT; } From b22b1f178f6799278d3178d894f37facb2085765 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 7 Jun 2012 19:04:19 -0400 Subject: [PATCH 283/475] ext4: don't set i_flags in EXT4_IOC_SETFLAGS Commit 7990696 uses the ext4_{set,clear}_inode_flags() functions to change the i_flags automatically but fails to remove the error setting of i_flags. So we still have the problem of trashing state flags. Fix this by removing the assignment. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ioctl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 8ad112ae0ad..e34deac3f36 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -123,7 +123,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) else ext4_clear_inode_flag(inode, i); } - ei->i_flags = flags; ext4_set_inode_flags(inode); inode->i_ctime = ext4_current_time(inode); From 0bdc81e4e944781a2bcc971f9e3cf24ac7030939 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 7 Jun 2012 09:52:12 +0800 Subject: [PATCH 284/475] regulator: core: Properly handle the case min_uV < rdev->desc->min_uV in map_voltage_linear Properly handle the case if the specified min_uV is less than the voltage given by the lowest selector. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7584a74eec8..09a737c868b 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2050,6 +2050,9 @@ int regulator_map_voltage_linear(struct regulator_dev *rdev, return -EINVAL; } + if (min_uV < rdev->desc->min_uV) + min_uV = rdev->desc->min_uV; + ret = DIV_ROUND_UP(min_uV - rdev->desc->min_uV, rdev->desc->uV_step); if (ret < 0) return ret; From 69c5b7530651c8e375d2fbecd157a9a9a20c4cc9 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 7 Jun 2012 17:58:31 -0600 Subject: [PATCH 285/475] ASoC: tegra: add MODULE_DEVICE_TABLE to tegra30_ahub This allows the module to automatically load when instantiated from device tree. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown --- sound/soc/tegra/tegra30_ahub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/tegra/tegra30_ahub.c b/sound/soc/tegra/tegra30_ahub.c index 57cd419f743..f43edb364a1 100644 --- a/sound/soc/tegra/tegra30_ahub.c +++ b/sound/soc/tegra/tegra30_ahub.c @@ -629,3 +629,4 @@ MODULE_AUTHOR("Stephen Warren "); MODULE_DESCRIPTION("Tegra30 AHUB driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:" DRV_NAME); +MODULE_DEVICE_TABLE(of, tegra30_ahub_of_match); From 48d212a2eecaca2e1875925837ad27b2f43f48a3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 7 Jun 2012 17:54:07 -0700 Subject: [PATCH 286/475] Revert "mm: correctly synchronize rss-counters at exit/exec" This reverts commit 40af1bbdca47e5c8a2044039bb78ca8fd8b20f94. It's horribly and utterly broken for at least the following reasons: - calling sync_mm_rss() from mmput() is fundamentally wrong, because there's absolutely no reason to believe that the task that does the mmput() always does it on its own VM. Example: fork, ptrace, /proc - you name it. - calling it *after* having done mmdrop() on it is doubly insane, since the mm struct may well be gone now. - testing mm against NULL before you call it is insane too, since a NULL mm there would have caused oopses long before. .. and those are just the three bugs I found before I decided to give up looking for me and revert it asap. I should have caught it before I even took it, but I trusted Andrew too much. Cc: Konstantin Khlebnikov Cc: Markus Trippelsdorf Cc: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + kernel/exit.c | 13 +++++-------- kernel/fork.c | 8 -------- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index b926ed19301..a79786a8d2c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -819,6 +819,7 @@ static int exec_mmap(struct mm_struct *mm) /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; + sync_mm_rss(old_mm); mm_release(tsk, old_mm); if (old_mm) { diff --git a/kernel/exit.c b/kernel/exit.c index 804fb6bb816..34867cc5b42 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -423,7 +423,6 @@ void daemonize(const char *name, ...) * user space pages. We don't need them, and if we didn't close them * they would be locked into memory. */ - mm_release(current, current->mm); exit_mm(current); /* * We don't want to get frozen, in case system-wide hibernation @@ -641,6 +640,7 @@ static void exit_mm(struct task_struct * tsk) struct mm_struct *mm = tsk->mm; struct core_state *core_state; + mm_release(tsk, mm); if (!mm) return; /* @@ -960,13 +960,9 @@ void do_exit(long code) preempt_count()); acct_update_integrals(tsk); - - /* Set exit_code before complete_vfork_done() in mm_release() */ - tsk->exit_code = code; - - /* Release mm and sync mm's RSS info before statistics gathering */ - mm_release(tsk, tsk->mm); - + /* sync mm's RSS info before statistics gathering */ + if (tsk->mm) + sync_mm_rss(tsk->mm); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); @@ -979,6 +975,7 @@ void do_exit(long code) tty_audit_exit(); audit_free(tsk); + tsk->exit_code = code; taskstats_exit(tsk, group_dead); exit_mm(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 0560781c690..ab5211b9e62 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -619,14 +619,6 @@ void mmput(struct mm_struct *mm) module_put(mm->binfmt->module); mmdrop(mm); } - - /* - * Final rss-counter synchronization. After this point there must be - * no pagefaults into this mm from the current context. Otherwise - * mm->rss_stat will be inconsistent. - */ - if (mm) - sync_mm_rss(mm); } EXPORT_SYMBOL_GPL(mmput); From 860aed25a1f0936d4852ab936252b47cd1e630f1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 1 Jun 2012 18:13:43 +1000 Subject: [PATCH 287/475] powerpc/time: Sanity check of decrementer expiration is necessary This reverts 68568add2c ("powerpc/time: Remove unnecessary sanity check of decrementer expiration"). We do need to check whether we have reached the expiration time of the next event, because we sometimes get an early decrementer interrupt, most notably when we set the decrementer to 1 in arch_irq_work_raise(). The effect of not having the sanity check is that if timer_interrupt() gets called early, we leave the decrementer set to its maximum value, which means we then don't get any more decrementer interrupts for about 4 seconds (or longer, depending on timebase frequency). I saw these pauses as a consequence of getting a stray hypervisor decrementer interrupt left over from exiting a KVM guest. This isn't quite a straight revert because of changes to the surrounding code, but it restores the same algorithm as was previously used. Cc: stable@vger.kernel.org Acked-by: Anton Blanchard Acked-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/time.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 99a995c2a3f..be171ee73bf 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -475,6 +475,7 @@ void timer_interrupt(struct pt_regs * regs) struct pt_regs *old_regs; u64 *next_tb = &__get_cpu_var(decrementers_next_tb); struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 now; /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -509,9 +510,16 @@ void timer_interrupt(struct pt_regs * regs) irq_work_run(); } - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + } else { + now = *next_tb - now; + if (now <= DECREMENTER_MAX) + set_dec((int)now); + } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ From ae82fdb1406ad41d68f07027fe31f2d35ba22a90 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 8 Jun 2012 14:58:13 +0930 Subject: [PATCH 288/475] module_param: stop double-calling parameters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 026cee0086fe1df4cf74691cf273062cc769617d "params: _initcall-like kernel parameters" set old-style module parameters to level 0. And we call those level 0 calls where we used to, early in start_kernel(). We also loop through the initcall levels and call the levelled module_params before the corresponding initcall. Unfortunately level 0 is early_init(), so we call the standard module_param calls twice. (Turns out most things don't care, but at least ubi.mtd does). Change the level to -1 for standard module_param calls. Reported-by: Benoît Thébaudeau Signed-off-by: Rusty Russell Cc: stable@kernel.org --- include/linux/moduleparam.h | 10 +++++----- init/main.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1b14d25162c..d6a58065c09 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -128,7 +128,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define module_param_cb(name, ops, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1) /** * _param_cb - general callback for a module/cmdline parameter @@ -192,7 +192,7 @@ struct kparam_array { (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, 0) + (perm) + sizeof(__check_old_set_param(set))*0, -1) /* We don't get oldget: it's often a new-style param_get_uint, etc. */ static inline int @@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void) */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, ¶m_ops_##type, &var, perm, 0) + __module_param_call("", name, ¶m_ops_##type, &var, perm, -1) #endif /* !MODULE */ /** @@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void) = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_ops_string, \ - .str = &__param_string_##name, perm, 0); \ + .str = &__param_string_##name, perm, -1); \ __MODULE_PARM_TYPE(name, "string") /** @@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ - perm, 0); \ + perm, -1); \ __MODULE_PARM_TYPE(name, "array of " #type) extern struct kernel_param_ops param_array_ops; diff --git a/init/main.c b/init/main.c index 1ca6b32c482..37e12098eac 100644 --- a/init/main.c +++ b/init/main.c @@ -508,7 +508,7 @@ asmlinkage void __init start_kernel(void) parse_early_param(); parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, - 0, 0, &unknown_bootoption); + -1, -1, &unknown_bootoption); jump_label_init(); From 19efb72fdc3c3bbfb929d91e34312b0494f14409 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 1 Jun 2012 18:56:00 +0200 Subject: [PATCH 289/475] init: Drop initcall level output 9fb48c744ba6a ("params: add 3rd arg to option handler callback signature") added similar lines to dmesg: initlevel:0=early, 4 registered initcalls initlevel:1=core, 31 registered initcalls initlevel:2=postcore, 11 registered initcalls initlevel:3=arch, 7 registered initcalls initlevel:4=subsys, 40 registered initcalls initlevel:5=fs, 30 registered initcalls initlevel:6=device, 250 registered initcalls initlevel:7=late, 35 registered initcalls but they don't contain any info for the general user staring at dmesg. I'm very doubtful the count of initcalls registered per level helps anyone so drop that output completely. Cc: Jim Cromie Cc: Rusty Russell Cc: Jason Baron Signed-off-by: Borislav Petkov Signed-off-by: Rusty Russell --- init/main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/init/main.c b/init/main.c index 37e12098eac..b5cc0a7c470 100644 --- a/init/main.c +++ b/init/main.c @@ -755,13 +755,8 @@ static void __init do_initcalls(void) { int level; - for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) { - pr_info("initlevel:%d=%s, %d registered initcalls\n", - level, initcall_level_names[level], - (int) (initcall_levels[level+1] - - initcall_levels[level])); + for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) do_initcall_level(level); - } } /* From 33d5e332b9c5ce0bf3bfd44ca2127d1773b3f2ad Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 7 Jun 2012 19:25:07 +0000 Subject: [PATCH 290/475] stmmac: fix driver built w/ w/o both pci and platf modules The commit ba27ec66ffeb78cbf fixes the Kconfig of the driver when built as module allowing to select/unselect the PCI and Platform modules that are not anymore mutually exclusive. This patch fixes and guarantees that the driver builds on all the platforms w/ w/o PCI and when select/unselect the two stmmac supports. In case of there are some problems on both the configuration and the pci/pltf registration the module_init will fail. v2: set the CONFIG_STMMAC_PLATFORM enabled by default. I've just noticed that this can actually help on some configurations that don't enable any STMMAC options by default (e.g. SPEAr). v3: change printk level when do not register the driver. Reported-by: Fengguang Wu Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 60 ++++++++++++++++++- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++---- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 0076f770e63..9f448279e12 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -15,6 +15,7 @@ if STMMAC_ETH config STMMAC_PLATFORM bool "STMMAC Platform bus support" depends on STMMAC_ETH + default y ---help--- This selects the platform specific bus support for the stmmac device driver. This is the driver used diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 6d07ba2c866..94b21b409d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -26,6 +26,7 @@ #include #include #include +#include #include "common.h" #ifdef CONFIG_STMMAC_TIMER #include "stmmac_timer.h" @@ -95,8 +96,6 @@ extern int stmmac_mdio_register(struct net_device *ndev); extern void stmmac_set_ethtool_ops(struct net_device *netdev); extern const struct stmmac_desc_ops enh_desc_ops; extern const struct stmmac_desc_ops ndesc_ops; -extern struct pci_driver stmmac_pci_driver; -extern struct platform_driver stmmac_pltfr_driver; int stmmac_freeze(struct net_device *ndev); int stmmac_restore(struct net_device *ndev); int stmmac_resume(struct net_device *ndev); @@ -144,3 +143,60 @@ static inline int stmmac_clk_get(struct stmmac_priv *priv) return 0; } #endif /* CONFIG_HAVE_CLK */ + + +#ifdef CONFIG_STMMAC_PLATFORM +extern struct platform_driver stmmac_pltfr_driver; +static inline int stmmac_register_platform(void) +{ + int err; + + err = platform_driver_register(&stmmac_pltfr_driver); + if (err) + pr_err("stmmac: failed to register the platform driver\n"); + + return err; +} +static inline void stmmac_unregister_platform(void) +{ + platform_driver_register(&stmmac_pltfr_driver); +} +#else +static inline int stmmac_register_platform(void) +{ + pr_debug("stmmac: do not register the platf driver\n"); + + return -EINVAL; +} +static inline void stmmac_unregister_platform(void) +{ +} +#endif /* CONFIG_STMMAC_PLATFORM */ + +#ifdef CONFIG_STMMAC_PCI +extern struct pci_driver stmmac_pci_driver; +static inline int stmmac_register_pci(void) +{ + int err; + + err = pci_register_driver(&stmmac_pci_driver); + if (err) + pr_err("stmmac: failed to register the PCI driver\n"); + + return err; +} +static inline void stmmac_unregister_pci(void) +{ + pci_unregister_driver(&stmmac_pci_driver); +} +#else +static inline int stmmac_register_pci(void) +{ + pr_debug("stmmac: do not register the PCI driver\n"); + + return -EINVAL; +} +static inline void stmmac_unregister_pci(void) +{ +} +#endif /* CONFIG_STMMAC_PCI */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 36385695141..51b3b68528e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -42,7 +42,6 @@ #include #include #include -#include #ifdef CONFIG_STMMAC_DEBUG_FS #include #include @@ -2094,25 +2093,29 @@ int stmmac_restore(struct net_device *ndev) } #endif /* CONFIG_PM */ +/* Driver can be configured w/ and w/ both PCI and Platf drivers + * depending on the configuration selected. + */ static int __init stmmac_init(void) { - int err = 0; + int err_plt = 0; + int err_pci = 0; - err = platform_driver_register(&stmmac_pltfr_driver); + err_plt = stmmac_register_platform(); + err_pci = stmmac_register_pci(); - if (!err) { - err = pci_register_driver(&stmmac_pci_driver); - if (err) - platform_driver_unregister(&stmmac_pltfr_driver); + if ((err_pci) && (err_plt)) { + pr_err("stmmac: driver registration failed\n"); + return -EINVAL; } - return err; + return 0; } static void __exit stmmac_exit(void) { - pci_unregister_driver(&stmmac_pci_driver); - platform_driver_unregister(&stmmac_pltfr_driver); + stmmac_unregister_platform(); + stmmac_unregister_pci(); } module_init(stmmac_init); From 8260ef075bd9848ce6a8004ec73b7454d410cc15 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jun 2012 09:01:37 +0200 Subject: [PATCH 291/475] ALSA: usb-audio: Fix substream assignments In 3.5 kernel, the endpoint is assigned dynamically for the substreams, but the PCM assignment still checks the presence of the endpoint pointer. This ended up in duplicated PCM substream creations at probing time, resulting in kernel warnings like: WARNING: at fs/proc/generic.c:586 proc_register+0x169/0x1a6() Pid: 1152, comm: modprobe Not tainted 3.5.0-rc1-00110-g71fae7e #2 Call Trace: [] warn_slowpath_common+0x83/0x9c [] warn_slowpath_fmt+0x46/0x48 [] ? add_preempt_count+0x39/0x3b [] proc_register+0x169/0x1a6 [] create_proc_entry+0x74/0x8c [] snd_info_register+0x3e/0xc3 [snd] [] snd_pcm_new_stream+0xb1/0x404 [snd_pcm] [] snd_usb_add_audio_stream+0xd2/0x230 [snd_usb_audio] [] ? snd_usb_parse_audio_format+0x252/0x34f [snd_usb_audio] [] ? kmem_cache_alloc_trace+0xab/0xbb [] snd_usb_parse_audio_interface+0x4ac/0x567 [snd_usb_audio] [] snd_usb_create_stream+0xe9/0x125 [snd_usb_audio] [] usb_audio_probe+0x62a/0x72c [snd_usb_audio] ..... This patch fixes the regression by checking the fixed endpoint number for each substream instead of the endpoint pointer. Reported-and-tested-by: Jamie Heilman Signed-off-by: Takashi Iwai --- sound/usb/card.h | 1 + sound/usb/stream.c | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/usb/card.h b/sound/usb/card.h index 0d37238b845..2b9fffff23b 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -119,6 +119,7 @@ struct snd_usb_substream { unsigned long unlink_mask; /* bitmask of unlinked urbs */ /* data and sync endpoints for this stream */ + unsigned int ep_num; /* the endpoint number */ struct snd_usb_endpoint *data_endpoint; struct snd_usb_endpoint *sync_endpoint; unsigned long flags; diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 6b7d7a2b7ba..083ed81160e 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -97,6 +97,7 @@ static void snd_usb_init_substream(struct snd_usb_stream *as, subs->formats |= fp->formats; subs->num_formats++; subs->fmt_type = fp->fmt_type; + subs->ep_num = fp->endpoint; } /* @@ -119,9 +120,7 @@ int snd_usb_add_audio_stream(struct snd_usb_audio *chip, if (as->fmt_type != fp->fmt_type) continue; subs = &as->substream[stream]; - if (!subs->data_endpoint) - continue; - if (subs->data_endpoint->ep_num == fp->endpoint) { + if (subs->ep_num == fp->endpoint) { list_add_tail(&fp->list, &subs->fmt_list); subs->num_formats++; subs->formats |= fp->formats; @@ -134,7 +133,7 @@ int snd_usb_add_audio_stream(struct snd_usb_audio *chip, if (as->fmt_type != fp->fmt_type) continue; subs = &as->substream[stream]; - if (subs->data_endpoint) + if (subs->ep_num) continue; err = snd_pcm_new_stream(as->pcm, stream, 1); if (err < 0) From 6a2b28ef036ab5c66fdc606fe97d9e5cb34ea409 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 8 Jun 2012 00:28:16 -0700 Subject: [PATCH 292/475] Revert "niu: Add support for byte queue limits." This reverts commit efa230f2c68abab817f13473077f8d0cc74f43f3. BQL doesn't work with how this driver currently only takes TX interrupts every 1/4 of the TX ring. That behavior needs to be fixed, but that's a larger non-trivial task and for now we have to revert BQL support as this makes the device currently completely unusable. Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 703c8cce2a2..8c726b7004d 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3598,7 +3598,6 @@ static int release_tx_packet(struct niu *np, struct tx_ring_info *rp, int idx) static void niu_tx_work(struct niu *np, struct tx_ring_info *rp) { struct netdev_queue *txq; - unsigned int tx_bytes; u16 pkt_cnt, tmp; int cons, index; u64 cs; @@ -3621,18 +3620,12 @@ static void niu_tx_work(struct niu *np, struct tx_ring_info *rp) netif_printk(np, tx_done, KERN_DEBUG, np->dev, "%s() pkt_cnt[%u] cons[%d]\n", __func__, pkt_cnt, cons); - tx_bytes = 0; - tmp = pkt_cnt; - while (tmp--) { - tx_bytes += rp->tx_buffs[cons].skb->len; + while (pkt_cnt--) cons = release_tx_packet(np, rp, cons); - } rp->cons = cons; smp_mb(); - netdev_tx_completed_queue(txq, pkt_cnt, tx_bytes); - out: if (unlikely(netif_tx_queue_stopped(txq) && (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))) { @@ -4333,7 +4326,6 @@ static void niu_free_channels(struct niu *np) struct tx_ring_info *rp = &np->tx_rings[i]; niu_free_tx_ring_info(np, rp); - netdev_tx_reset_queue(netdev_get_tx_queue(np->dev, i)); } kfree(np->tx_rings); np->tx_rings = NULL; @@ -6739,8 +6731,6 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, prod = NEXT_TX(rp, prod); } - netdev_tx_sent_queue(txq, skb->len); - if (prod < rp->prod) rp->wrap_bit ^= TX_RING_KICK_WRAP; rp->prod = prod; From c8e9cf7bb240049117d2fa64d1540476c289396d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Jun 2012 12:15:15 +0200 Subject: [PATCH 293/475] vga_switcheroo: Add a helper function to get the client state Add vga_switcheroo_get_client_state() to get the current state of the client. This is necessary to determine the proper initial state of audio clients in HD-audio driver. Acked-by: Dave Airlie Signed-off-by: Takashi Iwai --- drivers/gpu/vga/vga_switcheroo.c | 13 +++++++++++++ include/linux/vga_switcheroo.h | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index 38f9534ac51..eb4f64f0a56 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -190,6 +190,19 @@ find_active_client(struct list_head *head) return NULL; } +int vga_switcheroo_get_client_state(struct pci_dev *pdev) +{ + struct vga_switcheroo_client *client; + + client = find_client_from_pci(&vgasr_priv.clients, pdev); + if (!client) + return VGA_SWITCHEROO_NOT_FOUND; + if (!vgasr_priv.active) + return VGA_SWITCHEROO_INIT; + return client->pwr_state; +} +EXPORT_SYMBOL(vga_switcheroo_get_client_state); + void vga_switcheroo_unregister_client(struct pci_dev *pdev) { struct vga_switcheroo_client *client; diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b455c7c212e..b176342ca03 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -12,6 +12,9 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, + /* below are referred only from vga_switcheroo_get_client_state() */ + VGA_SWITCHEROO_INIT, + VGA_SWITCHEROO_NOT_FOUND, }; enum vga_switcheroo_client_id { @@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); +int vga_switcheroo_get_client_state(struct pci_dev *dev); + #else static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} @@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } + #endif From 12b78a7f671aa91fd41899615bf6d171c925c3d7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Jun 2012 12:15:16 +0200 Subject: [PATCH 294/475] ALSA: hda - Fix uninitialized HDMI controllers with VGA-switcheroo When VGA-switcheroo is built in but unused on systems with multiple graphics cards, the initializations of non-default graphics cards are skipped and never enabled (because the switcheroo is activated only when the controller supports). The current behavior is for avoiding the system lockup by accessing the disabled GPU, but due to the recent change in VGA-switcheroo, it determines the state simply by checking with the default VGA device. This is the culprit. Now with the new vga_switcheroo_get_client_state(), we can know the initial state of the bound GPU, thus can determine the initial audio client state more correctly. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2b6392be451..5f0375fefc8 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2670,7 +2670,7 @@ static bool __devinit check_hdmi_disabled(struct pci_dev *pci) struct pci_dev *p = get_bound_vga(pci); if (p) { - if (vga_default_device() && p != vga_default_device()) + if (vga_switcheroo_get_client_state(p) == VGA_SWITCHEROO_OFF) vga_inactive = true; pci_dev_put(p); } From bd2753b2dda7bb43c7468826de75f49c6a7e8965 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 6 Jun 2012 10:55:40 -0700 Subject: [PATCH 295/475] x86/mm: Only add extra pages count for the first memory range during pre-allocation early page table space Robin found this regression: | I just tried to boot an 8TB system. It fails very early in boot with: | Kernel panic - not syncing: Cannot find space for the kernel page tables git bisect commit 722bc6b16771ed80871e1fd81c86d3627dda2ac8. A git revert of that commit does boot past that point on the 8TB configuration. That commit will add up extra pages for all memory range even above 4g. Try to limit that extra page count adding to first entry only. Bisected-by: Robin Holt Tested-by: Robin Holt Signed-off-by: Yinghai Lu Cc: WANG Cong Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/CAE9FiQUj3wyzQxtq9yzBNc9u220p8JZ1FYHG7t%3DMOzJ%3D9BZMYA@mail.gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 97141c26a13..bc4e9d84157 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -62,7 +62,8 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en extra += PMD_SIZE; #endif /* The first 2/4M doesn't use large pages. */ - extra += mr->end - mr->start; + if (mr->start < PMD_SIZE) + extra += mr->end - mr->start; ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else From d5d2d2eea84b0d8450b082edbc3dbde41fb8bfd8 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 7 Jun 2012 08:31:40 -0500 Subject: [PATCH 296/475] x86/uv: Fix UV2 BAU legacy mode The SGI Altix UV2 BAU (Broadcast Assist Unit) as used for tlb-shootdown (selective broadcast mode) always uses UV2 broadcast descriptor format. There is no need to clear the 'legacy' (UV1) mode, because the hardware always uses UV2 mode for selective broadcast. But the BIOS uses general broadcast and legacy mode, and the hardware pays attention to the legacy mode bit for general broadcast. So the kernel must not clear that mode bit. Signed-off-by: Cliff Wickman Cc: Link: http://lkml.kernel.org/r/E1SccoO-0002Lh-Cb@eag09.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 1 - arch/x86/platform/uv/tlb_uv.c | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index becf47b8173..6149b476d9d 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -149,7 +149,6 @@ /* 4 bits of software ack period */ #define UV2_ACK_MASK 0x7UL #define UV2_ACK_UNITS_SHFT 3 -#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT /* diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3ae0e61abd2..59880afa851 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1295,7 +1295,6 @@ static void __init enable_timeouts(void) */ mmr_image |= (1L << SOFTACK_MSHIFT); if (is_uv2_hub()) { - mmr_image &= ~(1L << UV2_LEG_SHFT); mmr_image |= (1L << UV2_EXT_SHFT); } write_mmr_misc_control(pnode, mmr_image); From 3c75296562f43e6fbc6cddd3de948a7b3e4e9bcf Mon Sep 17 00:00:00 2001 From: Steffen Rumler Date: Wed, 6 Jun 2012 16:37:17 +0200 Subject: [PATCH 297/475] powerpc: Fix kernel panic during kernel module load This fixes a problem which can causes kernel oopses while loading a kernel module. According to the PowerPC EABI specification, GPR r11 is assigned the dedicated function to point to the previous stack frame. In the powerpc-specific kernel module loader, do_plt_call() (in arch/powerpc/kernel/module_32.c), GPR r11 is also used to generate trampoline code. This combination crashes the kernel, in the case where the compiler chooses to use a helper function for saving GPRs on entry, and the module loader has placed the .init.text section far away from the .text section, meaning that it has to generate a trampoline for functions in the .init.text section to call the GPR save helper. Because the trampoline trashes r11, references to the stack frame using r11 can cause an oops. The fix just uses GPR r12 instead of GPR r11 for generating the trampoline code. According to the statements from Freescale, this is safe from an EABI perspective. I've tested the fix for kernel 2.6.33 on MPC8541. Cc: stable@vger.kernel.org Signed-off-by: Steffen Rumler [paulus@samba.org: reworded the description] Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/module_32.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 0b6d79617d7..2e3200ca485 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -176,8 +176,8 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) - && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x398c0000 + (val & 0xffff)) return 1; return 0; } @@ -204,10 +204,9 @@ static uint32_t do_plt_call(void *location, entry++; } - /* Stolen from Paul Mackerras as well... */ - entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ - entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ - entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ + entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ + entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); From eab309494ae2b9e15f85520f00de3893162c2e43 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 24 May 2012 00:45:21 -0700 Subject: [PATCH 298/475] memblock: Document memblock_is_region_{memory,reserved}() At first glance one would think that memblock_is_region_memory() and memblock_is_region_reserved() would be implemented in the same way. Unfortunately they aren't and the former returns whether the region specified is a subset of a memory bank while the latter returns whether the region specified intersects with reserved memory. Document the two functions so that users aren't tempted to make the implementation the same between them and to clarify the purpose of the functions. Signed-off-by: Stephen Boyd Cc: Tejun Heo Link: http://lkml.kernel.org/r/1337845521-32755-1-git-send-email-sboyd@codeaurora.org Signed-off-by: Ingo Molnar --- mm/memblock.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index 952123eba43..32a0a5e4d79 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -867,6 +867,16 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) return memblock_search(&memblock.memory, addr) != -1; } +/** + * memblock_is_region_memory - check if a region is a subset of memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base+@size) is a subset of a memory block. + * + * RETURNS: + * 0 if false, non-zero if true + */ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) { int idx = memblock_search(&memblock.memory, base); @@ -879,6 +889,16 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size memblock.memory.regions[idx].size) >= end; } +/** + * memblock_is_region_reserved - check if a region intersects reserved memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base+@size) intersects a reserved memory block. + * + * RETURNS: + * 0 if false, non-zero if true + */ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { memblock_cap_size(base, &size); From eeaaa96a3a2134a174100afd129bb0891d05f4b2 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 6 Jun 2012 10:05:42 -0400 Subject: [PATCH 299/475] x86/nmi: Fix section mismatch warnings on 32-bit It was reported that compiling for 32-bit caused a bunch of section mismatch warnings: VDSOSYM arch/x86/vdso/vdso32-syms.lds LD arch/x86/vdso/built-in.o LD arch/x86/built-in.o WARNING: arch/x86/built-in.o(.data+0x5af0): Section mismatch in reference from the variable test_nmi_ipi_callback_na.10451 to the function .init.text:test_nmi_ipi_callback() [...] WARNING: arch/x86/built-in.o(.data+0x5b04): Section mismatch in reference from the variable nmi_unk_cb_na.10399 to the function .init.text:nmi_unk_cb() The variable nmi_unk_cb_na.10399 references the function __init nmi_unk_cb() [...] Both of these are attributed to the internal representation of the nmiaction struct created during register_nmi_handler. The reason for this is that those structs are not defined in the init section whereas the rest of the code in nmi_selftest.c is. To resolve this, I created a new #define, register_nmi_handler_initonly, that tags the struct as __initdata to resolve the mismatch. This #define should only be used in rare situations where the register/unregister is called during init of the kernel. Big thanks to Jan Beulich for decoding this for me as I didn't have a clue what was going on. Reported-by: Witold Baryluk Tested-by: Witold Baryluk Cc: Jan Beulich Signed-off-by: Don Zickus Link: http://lkml.kernel.org/r/1338991542-23000-1-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 14 ++++++++++++++ arch/x86/kernel/nmi_selftest.c | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 0e3793b821e..dc580c42851 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -54,6 +54,20 @@ struct nmiaction { __register_nmi_handler((t), &fn##_na); \ }) +/* + * For special handlers that register/unregister in the + * init section only. This should be considered rare. + */ +#define register_nmi_handler_initonly(t, fn, fg, n) \ +({ \ + static struct nmiaction fn##_na __initdata = { \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index e31bf8d5c4d..149b8d9c6ad 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) static void __init init_nmi_testsuite(void) { /* trap all the unknown NMIs we may generate */ - register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); + register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); } static void __init cleanup_nmi_testsuite(void) @@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask) { unsigned long timeout; - if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, + if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, NMI_FLAG_FIRST, "nmi_selftest")) { nmi_fail = FAILURE; return; From 505cff00de9c303b95c204eb4544066e3e707911 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jun 2012 12:49:17 +0200 Subject: [PATCH 300/475] vga_switcheroo: Fix error without CONFIG_VGA_SWITCHEROO Fix a typo that is built only when CONFIG_VGA_SWITCHEROO=n. Signed-off-by: Takashi Iwai --- include/linux/vga_switcheroo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b176342ca03..60da41fe9dc 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -67,7 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } -static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } #endif From 8393ec4a13889339bacfb8b038fb13716eef7a2b Mon Sep 17 00:00:00 2001 From: Steven Newbury Date: Fri, 8 Jun 2012 13:06:29 +0200 Subject: [PATCH 301/475] ALSA: hda - HDMI Audio init all connectors when VGA-switcheroo is off When VGA_SWITCHEROO support is enabled hda_intel initialises the HDMI audio device on the current VGA device. When it's not enabled it only initialises the HDMI device on the default VGA adaptor, this means secondary cards get no audio support which is very unhelpful for multi-seat! With this patch, when SUPPORT_VGA_SWITCHEROO is disabled hda_intel initialises all HDMI audio devices, not just the default VGA. [minor optimizations by tiwai] Signed-off-by: Steven Newbury Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 5f0375fefc8..e172c5d6855 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2484,9 +2484,9 @@ static void azx_notifier_unregister(struct azx *chip) static int DELAYED_INIT_MARK azx_first_init(struct azx *chip); static int DELAYED_INIT_MARK azx_probe_continue(struct azx *chip); +#ifdef SUPPORT_VGA_SWITCHEROO static struct pci_dev __devinit *get_bound_vga(struct pci_dev *pci); -#ifdef SUPPORT_VGA_SWITCHEROO static void azx_vs_set_state(struct pci_dev *pci, enum vga_switcheroo_state state) { @@ -2578,6 +2578,7 @@ static int __devinit register_vga_switcheroo(struct azx *chip) #else #define init_vga_switcheroo(chip) /* NOP */ #define register_vga_switcheroo(chip) 0 +#define check_hdmi_disabled(pci) false #endif /* SUPPORT_VGA_SWITCHER */ /* @@ -2638,6 +2639,7 @@ static int azx_dev_free(struct snd_device *device) return azx_free(device->device_data); } +#ifdef SUPPORT_VGA_SWITCHEROO /* * Check of disabled HDMI controller by vga-switcheroo */ @@ -2676,6 +2678,7 @@ static bool __devinit check_hdmi_disabled(struct pci_dev *pci) } return vga_inactive; } +#endif /* SUPPORT_VGA_SWITCHEROO */ /* * white/black-listing for position_fix From 2d0dbc6ae8a5194aaecb9cfffb9053f38fce8b86 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2012 10:58:09 -0400 Subject: [PATCH 302/475] NFSv4: Fix unnecessary delegation returns in nfs4_do_open While nfs4_do_open() expects the fmode argument to be restricted to combinations of FMODE_READ and FMODE_WRITE, both nfs4_atomic_open() and nfs4_proc_create will pass the nfs_open_context->mode, which contains the full fmode_t. This patch ensures that nfs4_do_open strips the other fmode_t bits, fixing a problem in which the nfs4_do_open call would result in an unnecessary delegation return. Reported-by: Fred Isaman Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5881f2cc5d8..f23977e4ac0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1902,6 +1902,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs4_state *res; int status; + fmode &= FMODE_READ|FMODE_WRITE; do { status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res, ctx_th); From 32ba9c3fcab960f0b0d332c86ebcd2c4870d9bb8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 8 Jun 2012 10:34:03 -0700 Subject: [PATCH 303/475] Revert "vfs: stop d_splice_alias creating directory aliases" This reverts commit 7732a557b1342c6e6966efb5f07effcf99f56167 (and commit 3f50fff4dace23d3cfeb195d5cd4ee813cee68b7, which was a follow-up cleanup). We're chasing an elusive bug that Dave Jones can apparently reproduce using his system call fuzzer tool, and that looks like some kind of locking ordering problem on the directory i_mutex chain. Our i_mutex locking is rather complex, and depends on the topological ordering of the directories, which is why we have been very wary of splicing directory entries around. Of course, we really don't want to ever see aliased unconnected directories anyway, so none of this should ever happen, but this revert aims to basically get us back to a known older state. Bruce points to some of the previous discussion at http://marc.info/?i=<20110310105821.GE22723@ZenIV.linux.org.uk> and in particular a long post from Neil: http://marc.info/?i=<20110311150749.2fa2be66@notabene.brown> It should be noted that it's possible that Dave's problems come from other changes altohgether, including possibly just the fact that Dave constantly is teachning his fuzzer new tricks. So what appears to be a new bug could in fact be an old one that just gets newly triggered, but reverting these patches as "still under heavy discussion" is the right thing regardless. Requested-by: Al Viro Acked-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- fs/dcache.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 85c9e2bff8e..40469044088 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -683,6 +683,8 @@ EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question + * @want_discon: flag, used by d_splice_alias, to request + * that only a DISCONNECTED alias be returned. * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. @@ -691,9 +693,10 @@ EXPORT_SYMBOL(dget_parent); * of a filesystem. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that. + * any other hashed alias over that one unless @want_discon is set, + * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ -static struct dentry *__d_find_alias(struct inode *inode) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; @@ -705,7 +708,7 @@ again: if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - } else { + } else if (!want_discon) { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; @@ -736,7 +739,7 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); - de = __d_find_alias(inode); + de = __d_find_alias(inode, 0); spin_unlock(&inode->i_lock); } return de; @@ -1647,8 +1650,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - new = __d_find_any_alias(inode); + new = __d_find_alias(inode, 1); if (new) { + BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); @@ -2478,7 +2482,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) struct dentry *alias; /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode); + alias = __d_find_alias(inode, 0); if (alias) { actual = alias; write_seqlock(&rename_lock); From aac495a8002bbf20e879572c6cf0789ab2ffb32b Mon Sep 17 00:00:00 2001 From: Stanislav Yakovlev Date: Mon, 14 May 2012 19:06:19 -0400 Subject: [PATCH 304/475] net/wireless: ipw2100: Fix WARN_ON occurring in wiphy_register called by ipw2100_pci_init_one The problem was found by Larry Finger: http://marc.info/?l=linux-wireless&m=133702401700614&w=2 The problem is identical to the one for ipw2200 which is already fixed: http://marc.info/?l=linux-wireless&m=133457257407196&w=2 [ 17.766431] ------------[ cut here ]------------ [ 17.766467] WARNING: at net/wireless/core.c:562 wiphy_register+0x34c/0x3c0 [cfg80211]() [ 17.766471] Hardware name: Latitude D600 [ 17.766474] Modules linked in: ipw2100(+) libipw pcmcia cfg80211 ppdev parport_pc yenta_socket sr_mod pcmcia_rsrc parport iTCO_wdt cdrom sg rfkill pcmcia_ core lib80211 tg3 video button battery ac iTCO_vendor_support joydev shpchp pcspkr pciehp pci_hotplug autofs4 radeon ttm drm_kms_helper uhci_hcd ehci_hcd rtc _cmos thermal drm hwmon i2c_algo_bit i2c_core processor usbcore usb_common ata_generic ata_piix ahci libahci libata [ 17.766525] Pid: 474, comm: modprobe Not tainted 3.4.0-rc7-wl+ #6 [ 17.766528] Call Trace: [ 17.766541] [] ? printk+0x28/0x2a [ 17.766552] [] warn_slowpath_common+0x6d/0xa0 [ 17.766563] [] ? wiphy_register+0x34c/0x3c0 [cfg80211] [ 17.766573] [] ? wiphy_register+0x34c/0x3c0 [cfg80211] [ 17.766578] [] warn_slowpath_null+0x1d/0x20 [ 17.766588] [] wiphy_register+0x34c/0x3c0 [cfg80211] [ 17.766605] [] ipw2100_wdev_init+0x196/0x1c0 [ipw2100] [ 17.766616] [] ipw2100_pci_init_one+0x2b2/0x694 [ipw2100] [ 17.766632] [] local_pci_probe+0x42/0xb0 [ 17.766637] [] pci_device_probe+0x60/0x90 [ 17.766645] [] ? sysfs_create_link+0x12/0x20 [ 17.766654] [] really_probe+0x56/0x2e0 [ 17.766659] [] ? create_dir+0x5d/0xa0 [ 17.766667] [] ? pm_runtime_barrier+0x3b/0xa0 [ 17.766672] [] driver_probe_device+0x44/0xa0 [ 17.766677] [] ? pci_match_device+0x97/0xa0 [ 17.766681] [] __driver_attach+0x89/0x90 [ 17.766686] [] ? driver_probe_device+0xa0/0xa0 [ 17.766691] [] bus_for_each_dev+0x3a/0x70 [ 17.766695] [] driver_attach+0x1c/0x30 [ 17.766699] [] ? driver_probe_device+0xa0/0xa0 [ 17.766704] [] bus_add_driver+0x187/0x280 [ 17.766710] [] ? kset_find_obj+0x2d/0x60 [ 17.766715] [] ? pci_device_probe+0x90/0x90 [ 17.766719] [] ? pci_device_probe+0x90/0x90 [ 17.766724] [] driver_register+0x65/0x110 [ 17.766729] [] __pci_register_driver+0x3d/0xa0 [ 17.766738] [] ipw2100_init+0x5c/0x1000 [ipw2100] [ 17.766743] [] do_one_initcall+0x2f/0x170 [ 17.766749] [] ? 0xe09f6fff [ 17.766757] [] sys_init_module+0xa8/0x210 [ 17.766766] [] syscall_call+0x7/0xb [ 17.766769] ---[ end trace 559898c6bb0d1c75 ]--- [ 17.767093] ipw2100: probe of 0000:02:03.0 failed with error -5 This warning appears only if we apply Ben Hutchings' fix http://marc.info/?l=linux-wireless&m=132720204412667&w=2 for the bug reported by Cesare Leonardi http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=656813 with cfg80211 warning during device registration ("cfg80211: failed to add phy80211 symlink to netdev!"). We separate device bring up and registration with network stack to avoid the problem. Signed-off-by: Stanislav Yakovlev Tested-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/ipw2x00/ipw2100.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 9cfae0c0870..95aa8e1683e 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -1903,14 +1903,6 @@ static void ipw2100_down(struct ipw2100_priv *priv) netif_stop_queue(priv->net_dev); } -/* Called by register_netdev() */ -static int ipw2100_net_init(struct net_device *dev) -{ - struct ipw2100_priv *priv = libipw_priv(dev); - - return ipw2100_up(priv, 1); -} - static int ipw2100_wdev_init(struct net_device *dev) { struct ipw2100_priv *priv = libipw_priv(dev); @@ -6087,7 +6079,6 @@ static const struct net_device_ops ipw2100_netdev_ops = { .ndo_stop = ipw2100_close, .ndo_start_xmit = libipw_xmit, .ndo_change_mtu = libipw_change_mtu, - .ndo_init = ipw2100_net_init, .ndo_tx_timeout = ipw2100_tx_timeout, .ndo_set_mac_address = ipw2100_set_address, .ndo_validate_addr = eth_validate_addr, @@ -6329,6 +6320,10 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, printk(KERN_INFO DRV_NAME ": Detected Intel PRO/Wireless 2100 Network Connection\n"); + err = ipw2100_up(priv, 1); + if (err) + goto fail; + err = ipw2100_wdev_init(dev); if (err) goto fail; @@ -6338,12 +6333,7 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, * network device we would call ipw2100_up. This introduced a race * condition with newer hotplug configurations (network was coming * up and making calls before the device was initialized). - * - * If we called ipw2100_up before we registered the device, then the - * device name wasn't registered. So, we instead use the net_dev->init - * member to call a function that then just turns and calls ipw2100_up. - * net_dev->init is called after name allocation but before the - * notifier chain is called */ + */ err = register_netdev(dev); if (err) { printk(KERN_WARNING DRV_NAME From 0fde0a8cfd0ede7f310d6a681c8e5a7cb3e32406 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 16 May 2012 11:06:21 +0200 Subject: [PATCH 305/475] rtl8187: ->brightness_set can not sleep Fix: BUG: sleeping function called from invalid context at kernel/workqueue.c:2547 in_atomic(): 1, irqs_disabled(): 0, pid: 629, name: wpa_supplicant 2 locks held by wpa_supplicant/629: #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x14/0x20 #1: (&trigger->leddev_list_lock){.+.?..}, at: [] led_trigger_event+0x21/0x80 Pid: 629, comm: wpa_supplicant Not tainted 3.3.0-0.rc3.git5.1.fc17.i686 Call Trace: [] __might_sleep+0x126/0x1d0 [] wait_on_work+0x2c/0x1d0 [] __cancel_work_timer+0x6a/0x120 [] cancel_delayed_work_sync+0x10/0x20 [] rtl8187_led_brightness_set+0x82/0xf0 [rtl8187] [] led_trigger_event+0x5c/0x80 [] ieee80211_led_radio+0x1d/0x40 [mac80211] [] ieee80211_stop_device+0x13/0x230 [mac80211] Removing _sync is ok, because if led_on work is currently running it will be finished before led_off work start to perform, since they are always queued on the same mac80211 local->workqueue. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=795176 Signed-off-by: Stanislaw Gruszka Acked-by: Larry Finger Acked-by: Hin-Tak Leung Signed-off-by: John W. Linville --- drivers/net/wireless/rtl818x/rtl8187/leds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rtl818x/rtl8187/leds.c b/drivers/net/wireless/rtl818x/rtl8187/leds.c index 2e0de2f5f0f..c2d5b495c17 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/leds.c +++ b/drivers/net/wireless/rtl818x/rtl8187/leds.c @@ -117,7 +117,7 @@ static void rtl8187_led_brightness_set(struct led_classdev *led_dev, radio_on = true; } else if (radio_on) { radio_on = false; - cancel_delayed_work_sync(&priv->led_on); + cancel_delayed_work(&priv->led_on); ieee80211_queue_delayed_work(hw, &priv->led_off, 0); } } else if (radio_on) { From 06cf5c4c5bea549235da1aace86b90d4ad0084f8 Mon Sep 17 00:00:00 2001 From: Qasim Javed Date: Tue, 5 Jun 2012 01:25:44 -0500 Subject: [PATCH 306/475] mac80211_hwsim: Set IEEE80211_STAT_ACK flag when userspace indicates that the frame has been acknowledged. The station fail average is not updated correctly since the IEEE80211_STAT_ACK flag is not set when using wmediumd with mac80211_hwsim. Set this flag when wmediumd indicates that the frame was successfully transmitted (eventually). Signed-off-by: Qasim Javed Signed-off-by: Javier Cardona Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4c9336cee81..a0b7cfd3468 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1555,6 +1555,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, hdr = (struct ieee80211_hdr *) skb->data; mac80211_hwsim_monitor_ack(data2->hw, hdr->addr2); } + txi->flags |= IEEE80211_TX_STAT_ACK; } ieee80211_tx_status_irqsafe(data2->hw, skb); return 0; From ce466579b1c92fce78bf5f74a3707f46e228d949 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 15:42:55 +0200 Subject: [PATCH 307/475] wireless: add my new trees to MAINTAINERS Add my new trees to the MAINTAINERS file for the components that I maintain in the new trees. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- MAINTAINERS | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 55f0fda602e..03660de94cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1800,6 +1800,9 @@ F: include/linux/cfag12864b.h CFG80211 and NL80211 M: Johannes Berg L: linux-wireless@vger.kernel.org +W: http://wireless.kernel.org/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: include/linux/nl80211.h F: include/net/cfg80211.h @@ -4340,7 +4343,8 @@ MAC80211 M: Johannes Berg L: linux-wireless@vger.kernel.org W: http://linuxwireless.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: Documentation/networking/mac80211-injection.txt F: include/net/mac80211.h @@ -4351,7 +4355,8 @@ M: Stefano Brivio M: Mattias Nissler L: linux-wireless@vger.kernel.org W: http://linuxwireless.org/en/developers/Documentation/mac80211/RateControl/PID -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: net/mac80211/rc80211_pid* @@ -5695,6 +5700,9 @@ F: include/linux/remoteproc.h RFKILL M: Johannes Berg L: linux-wireless@vger.kernel.org +W: http://wireless.kernel.org/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: Documentation/rfkill.txt F: net/rfkill/ From d2c8b15d0cb486f4938ba7f2af349d9d1220cb10 Mon Sep 17 00:00:00 2001 From: Meenakshi Venkataraman Date: Tue, 5 Jun 2012 20:24:37 +0200 Subject: [PATCH 308/475] iwlwifi: use correct supported firmware for 6035 and 6000g2 My patch iwlwifi: use correct released ucode version did not correctly report supported firmware for the 6035 device. This patch fixes it. The minimum supported firmware version for 6035 is v6. Also correct the minimum supported firmware version for the 6000g2 series of devices. Cc: stable@kernel.org Signed-off-by: Meenakshi Venkataraman Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-6000.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c index 19f7ee84ae8..e5e8ada4aaf 100644 --- a/drivers/net/wireless/iwlwifi/iwl-6000.c +++ b/drivers/net/wireless/iwlwifi/iwl-6000.c @@ -35,17 +35,20 @@ #define IWL6000_UCODE_API_MAX 6 #define IWL6050_UCODE_API_MAX 5 #define IWL6000G2_UCODE_API_MAX 6 +#define IWL6035_UCODE_API_MAX 6 /* Oldest version we won't warn about */ #define IWL6000_UCODE_API_OK 4 #define IWL6000G2_UCODE_API_OK 5 #define IWL6050_UCODE_API_OK 5 #define IWL6000G2B_UCODE_API_OK 6 +#define IWL6035_UCODE_API_OK 6 /* Lowest firmware API version supported */ #define IWL6000_UCODE_API_MIN 4 #define IWL6050_UCODE_API_MIN 4 -#define IWL6000G2_UCODE_API_MIN 4 +#define IWL6000G2_UCODE_API_MIN 5 +#define IWL6035_UCODE_API_MIN 6 /* EEPROM versions */ #define EEPROM_6000_TX_POWER_VERSION (4) @@ -227,9 +230,25 @@ const struct iwl_cfg iwl6030_2bg_cfg = { IWL_DEVICE_6030, }; +#define IWL_DEVICE_6035 \ + .fw_name_pre = IWL6030_FW_PRE, \ + .ucode_api_max = IWL6035_UCODE_API_MAX, \ + .ucode_api_ok = IWL6035_UCODE_API_OK, \ + .ucode_api_min = IWL6035_UCODE_API_MIN, \ + .device_family = IWL_DEVICE_FAMILY_6030, \ + .max_inst_size = IWL60_RTC_INST_SIZE, \ + .max_data_size = IWL60_RTC_DATA_SIZE, \ + .eeprom_ver = EEPROM_6030_EEPROM_VERSION, \ + .eeprom_calib_ver = EEPROM_6030_TX_POWER_VERSION, \ + .base_params = &iwl6000_g2_base_params, \ + .bt_params = &iwl6000_bt_params, \ + .need_temp_offset_calib = true, \ + .led_mode = IWL_LED_RF_STATE, \ + .adv_pm = true + const struct iwl_cfg iwl6035_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N 6235 AGN", - IWL_DEVICE_6030, + IWL_DEVICE_6035, .ht_params = &iwl6000_ht_params, }; From e64add27e1b4874f589e550b4e0ca6715070373f Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 5 Jun 2012 20:39:32 +0200 Subject: [PATCH 309/475] b43: do not call ieee80211_unregister_hw if we are not registred this patch fixes kernel Oops on "rmmod b43" if firmware was not loaded: BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 IP: [] drain_workqueue+0x25/0x142 PGD 153ac6067 PUD 153b82067 PMD 0 Oops: 0000 [#1] SMP Signed-off-by: Oleksij Rempel Signed-off-by: John W. Linville --- drivers/net/wireless/b43/b43.h | 4 ++++ drivers/net/wireless/b43/main.c | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index 67c13af6f20..c06b6cb5c91 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -877,6 +877,10 @@ struct b43_wl { * from the mac80211 subsystem. */ u16 mac80211_initially_registered_queues; + /* Set this if we call ieee80211_register_hw() and check if we call + * ieee80211_unregister_hw(). */ + bool hw_registred; + /* We can only have one operating interface (802.11 core) * at a time. General information about this interface follows. */ diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 5a39b226b2e..acd03a4f973 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -2437,6 +2437,7 @@ start_ieee80211: err = ieee80211_register_hw(wl->hw); if (err) goto err_one_core_detach; + wl->hw_registred = true; b43_leds_register(wl->current_dev); goto out; @@ -5299,6 +5300,7 @@ static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev) hw->queues = modparam_qos ? B43_QOS_QUEUE_NUM : 1; wl->mac80211_initially_registered_queues = hw->queues; + wl->hw_registred = false; hw->max_rates = 2; SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) @@ -5370,12 +5372,15 @@ static void b43_bcma_remove(struct bcma_device *core) * as the ieee80211 unreg will destroy the workqueue. */ cancel_work_sync(&wldev->restart_work); - /* Restore the queues count before unregistering, because firmware detect - * might have modified it. Restoring is important, so the networking - * stack can properly free resources. */ - wl->hw->queues = wl->mac80211_initially_registered_queues; - b43_leds_stop(wldev); - ieee80211_unregister_hw(wl->hw); + B43_WARN_ON(!wl); + if (wl->current_dev == wldev && wl->hw_registred) { + /* Restore the queues count before unregistering, because firmware detect + * might have modified it. Restoring is important, so the networking + * stack can properly free resources. */ + wl->hw->queues = wl->mac80211_initially_registered_queues; + b43_leds_stop(wldev); + ieee80211_unregister_hw(wl->hw); + } b43_one_core_detach(wldev->dev); @@ -5446,7 +5451,7 @@ static void b43_ssb_remove(struct ssb_device *sdev) cancel_work_sync(&wldev->restart_work); B43_WARN_ON(!wl); - if (wl->current_dev == wldev) { + if (wl->current_dev == wldev && wl->hw_registred) { /* Restore the queues count before unregistering, because firmware detect * might have modified it. Restoring is important, so the networking * stack can properly free resources. */ From e7027075d0d8fe2d1bf7b0db24623328d2601d3c Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 5 Jun 2012 20:58:20 +0200 Subject: [PATCH 310/475] bcma: fix null pointer in bcma_core_pci_irq_ctl pc could be null if hosttype != BCMA_HOSTTYPE_PCI. If we are on a device without a pci core this function is called with pc = null by b43 and brcmsmac. If the host type is PCI we have a pci core as well and pc can not be null. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index 9a96f14c8f4..c32ebd537ab 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -232,17 +232,19 @@ void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc) int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable) { - struct pci_dev *pdev = pc->core->bus->host_pci; + struct pci_dev *pdev; u32 coremask, tmp; int err = 0; - if (core->bus->hosttype != BCMA_HOSTTYPE_PCI) { + if (!pc || core->bus->hosttype != BCMA_HOSTTYPE_PCI) { /* This bcma device is not on a PCI host-bus. So the IRQs are * not routed through the PCI core. * So we must not enable routing through the PCI core. */ goto out; } + pdev = pc->core->bus->host_pci; + err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp); if (err) goto out; From d6ee27eb13beab94056e0de52d81220058ca2297 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 6 Jun 2012 09:13:36 +0200 Subject: [PATCH 311/475] iwlwifi: don't mess up the SCD when removing a key When we remove a key, we put a key index which was supposed to tell the fw that we are actually removing the key. But instead the fw took that index as a valid index and messed up the SRAM of the device. This memory corruption on the device mangled the data of the SCD. The impact on the user is that SCD queue 2 got stuck after having removed keys. The message is the log that was printed is: Queue 2 stuck for 10000ms This doesn't seem to fix the higher queues that get stuck from time to time. Cc: stable@vger.kernel.org [2.6.27+] Reviewed-by: Meenakshi Venkataraman Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn-sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-sta.c b/drivers/net/wireless/iwlwifi/iwl-agn-sta.c index aea07aab3c9..eb6a8eaf42f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-sta.c @@ -1267,7 +1267,7 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv, key_flags |= STA_KEY_MULTICAST_MSK; sta_cmd.key.key_flags = key_flags; - sta_cmd.key.key_offset = WEP_INVALID_OFFSET; + sta_cmd.key.key_offset = keyconf->hw_key_idx; sta_cmd.sta.modify_mask = STA_MODIFY_KEY_MASK; sta_cmd.mode = STA_CONTROL_MODIFY_MSK; From d012d04e4d6312ea157b6cf19e9689af934f5aa7 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 6 Jun 2012 13:55:02 +0200 Subject: [PATCH 312/475] iwlwifi: disable the buggy chain extension feature in HW This feature has been reported to be buggy and enabled by default. We therefore need to disable it manually. Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-prph.h | 1 + drivers/net/wireless/iwlwifi/iwl-trans-pcie.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-prph.h b/drivers/net/wireless/iwlwifi/iwl-prph.h index 3b1069290fa..dfd54662e3e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/iwlwifi/iwl-prph.h @@ -224,6 +224,7 @@ #define SCD_TXFACT (SCD_BASE + 0x10) #define SCD_ACTIVE (SCD_BASE + 0x14) #define SCD_QUEUECHAIN_SEL (SCD_BASE + 0xe8) +#define SCD_CHAINEXT_EN (SCD_BASE + 0x244) #define SCD_AGGR_SEL (SCD_BASE + 0x248) #define SCD_INTERRUPT_MASK (SCD_BASE + 0x108) diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c index ec6fb395b84..79c6b91417f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c +++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c @@ -1058,6 +1058,11 @@ static void iwl_tx_start(struct iwl_trans *trans) iwl_write_prph(trans, SCD_DRAM_BASE_ADDR, trans_pcie->scd_bc_tbls.dma >> 10); + /* The chain extension of the SCD doesn't work well. This feature is + * enabled by default by the HW, so we need to disable it manually. + */ + iwl_write_prph(trans, SCD_CHAINEXT_EN, 0); + /* Enable DMA channel */ for (chan = 0; chan < FH_TCSR_CHNL_NUM ; chan++) iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan), From 58d1eab7ef1d7ff8e448699dfd1a21b7f3303296 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 6 Jun 2012 23:02:55 +0200 Subject: [PATCH 313/475] NFC: Fix possible NULL ptr deref when getting the name of a socket llcp_sock_getname() might get called before the LLCP socket was created. This condition isn't checked, and llcp_sock_getname will simply deref a NULL ptr in that case. This exists starting with d646960 ("NFC: Initial LLCP support"). Signed-off-by: Sasha Levin Signed-off-by: John W. Linville --- net/nfc/llcp/sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 3f339b19d14..17a707db40e 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -292,6 +292,9 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr, pr_debug("%p\n", sk); + if (llcp_sock == NULL) + return -EBADFD; + addr->sa_family = AF_NFC; *len = sizeof(struct sockaddr_nfc_llcp); From 6aee4ca3d2217d3f76469e5ed576d62695f0912a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Jun 2012 14:47:21 +0200 Subject: [PATCH 314/475] mac80211: add back channel change flag commit 24398e39c8ee4a9d9123eed322b859ece4d16cac Author: Johannes Berg Date: Wed Mar 28 10:58:36 2012 +0200 mac80211: set HT channel before association removed IEEE80211_CONF_CHANGE_CHANNEL argument from ieee80211_hw_config, which is required by iwl4965 driver, otherwise that driver does not configure channel properly and is not able to associate. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d94627c2929..91d84cc77bb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3124,7 +3124,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } local->oper_channel = cbss->channel; - ieee80211_hw_config(local, 0); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); if (!have_sta) { u32 rates = 0, basic_rates = 0; From 4399a4df98a63e30fd16e9d0cecc46ea92269e8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jun 2012 06:25:00 +0000 Subject: [PATCH 315/475] l2tp: fix a race in l2tp_ip_sendmsg() Commit 081b1b1bb27f (l2tp: fix l2tp_ip_sendmsg() route handling) added a race, in case IP route cache is disabled. In this case, we should not do the dst_release(&rt->dst), since it'll free the dst immediately, instead of waiting a RCU grace period. Signed-off-by: Eric Dumazet Cc: James Chapman Cc: Denys Fedoryshchenko Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 70614e7affa..61d8b75d268 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -464,10 +464,12 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m sk->sk_bound_dev_if); if (IS_ERR(rt)) goto no_route; - if (connected) + if (connected) { sk_setup_caps(sk, &rt->dst); - else - dst_release(&rt->dst); /* safe since we hold rcu_read_lock */ + } else { + skb_dst_set(skb, &rt->dst); + goto xmit; + } } /* We dont need to clone dst here, it is guaranteed to not disappear. @@ -475,6 +477,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m */ skb_dst_set_noref(skb, &rt->dst); +xmit: /* Queue the packet to IP for output */ rc = ip_queue_xmit(skb, &inet->cork.fl); rcu_read_unlock(); From cd8f76c0a0c6fce0b2cf23c9bd0123f91453f46d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jun 2012 22:59:59 +0000 Subject: [PATCH 316/475] be2net: fix a race in be_xmit() As soon as hardware is notified of a transmit, we no longer can assume skb can be dereferenced, as TX completion might have freed the packet. Signed-off-by: Eric Dumazet Cc: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 08efd308d78..fdb50cec6b5 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -736,6 +736,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, copied = make_tx_wrbs(adapter, txq, skb, wrb_cnt, dummy_wrb); if (copied) { + int gso_segs = skb_shinfo(skb)->gso_segs; + /* record the sent skb in the sent_skb table */ BUG_ON(txo->sent_skb_list[start]); txo->sent_skb_list[start] = skb; @@ -753,8 +755,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, be_txq_notify(adapter, txq->id, wrb_cnt); - be_tx_stats_update(txo, wrb_cnt, copied, - skb_shinfo(skb)->gso_segs, stopped); + be_tx_stats_update(txo, wrb_cnt, copied, gso_segs, stopped); } else { txq->head = start; dev_kfree_skb_any(skb); From 8f53369b753f5f4c7684c2eb0b592152abb1dd00 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 8 Jun 2012 14:53:06 -0700 Subject: [PATCH 317/475] Revert "drm/i915/crt: Do not rely upon the HPD presence pin" This reverts commit 9e612a008fa7fe493a473454def56aa321479495. It incorrectly finds VGA connectors where none are attached, apparently not noticing that nothing replied to the EDID queries, and happily using the default EDID modes that have nothing to do with actual hardware. That in turn then causes X to fall down to the lowest common denominator, which is usually the default 1024x768 mode that is in the default EDID and pretty much anything supports). I'd suggest that if not relying on the HDP pin, the code should at least check whether it gets valid EDID data back, rather than just assume there's something on the VGA connector. Cc: Dave Airlie Cc: Chris Wilson Cc: Daniel Vetter Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/intel_crt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 844e93e1e39..75a70c46ef1 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -453,15 +453,13 @@ intel_crt_detect(struct drm_connector *connector, bool force) struct intel_load_detect_pipe tmp; if (I915_HAS_HOTPLUG(dev)) { - /* We can not rely on the HPD pin always being correctly wired - * up, for example many KVM do not pass it through, and so - * only trust an assertion that the monitor is connected. - */ if (intel_crt_detect_hotplug(connector)) { DRM_DEBUG_KMS("CRT detected via hotplug\n"); return connector_status_connected; - } else + } else { DRM_DEBUG_KMS("CRT not detected via hotplug\n"); + return connector_status_disconnected; + } } if (intel_crt_detect_ddc(connector)) From cd96891d48a945ca2011fbeceda73813d6286195 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 8 Jun 2012 13:18:33 -0700 Subject: [PATCH 318/475] sched/fair: fix lots of kernel-doc warnings Fix lots of new kernel-doc warnings in kernel/sched/fair.c: Warning(kernel/sched/fair.c:3625): No description found for parameter 'env' Warning(kernel/sched/fair.c:3625): Excess function parameter 'sd' description in 'update_sg_lb_stats' Warning(kernel/sched/fair.c:3735): No description found for parameter 'env' Warning(kernel/sched/fair.c:3735): Excess function parameter 'sd' description in 'update_sd_pick_busiest' Warning(kernel/sched/fair.c:3735): Excess function parameter 'this_cpu' description in 'update_sd_pick_busiest' .. more warnings Signed-off-by: Randy Dunlap Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/sched/fair.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2a2d236f27..d5583f9588e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3632,7 +3632,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. - * @sd: The sched_domain whose statistics are to be updated. + * @env: The load balancing environment. * @group: sched_group whose statistics are to be updated. * @load_idx: Load index of sched_domain of this_cpu for load calc. * @local_group: Does group contain this_cpu. @@ -3741,11 +3741,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, /** * update_sd_pick_busiest - return 1 on busiest group - * @sd: sched_domain whose statistics are to be checked + * @env: The load balancing environment. * @sds: sched_domain statistics * @sg: sched_group candidate to be checked for being the busiest * @sgs: sched_group statistics - * @this_cpu: the current cpu * * Determine if @sg is a busier group than the previously selected * busiest group. @@ -3783,9 +3782,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, /** * update_sd_lb_stats - Update sched_domain's statistics for load balancing. - * @sd: sched_domain whose statistics are to be updated. - * @this_cpu: Cpu for which load balance is currently performed. - * @idle: Idle status of this_cpu + * @env: The load balancing environment. * @cpus: Set of cpus considered for load balancing. * @balance: Should we balance. * @sds: variable to hold the statistics for this sched_domain. @@ -3874,10 +3871,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, * Returns 1 when packing is required and a task should be moved to * this CPU. The amount of the imbalance is returned in *imbalance. * - * @sd: The sched_domain whose packing is to be checked. + * @env: The load balancing environment. * @sds: Statistics of the sched_domain which is to be packed - * @this_cpu: The cpu at whose sched_domain we're performing load-balance. - * @imbalance: returns amount of imbalanced due to packing. */ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) { @@ -3903,9 +3898,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) * fix_small_imbalance - Calculate the minor imbalance that exists * amongst the groups of a sched_domain, during * load balancing. + * @env: The load balancing environment. * @sds: Statistics of the sched_domain whose imbalance is to be calculated. - * @this_cpu: The cpu at whose sched_domain we're performing load-balance. - * @imbalance: Variable to store the imbalance. */ static inline void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) @@ -4048,11 +4042,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * Also calculates the amount of weighted load which should be moved * to restore balance. * - * @sd: The sched_domain whose busiest group is to be returned. - * @this_cpu: The cpu for which load balancing is currently being performed. - * @imbalance: Variable which stores amount of weighted load which should - * be moved to restore balance/put a group to idle. - * @idle: The idle status of this_cpu. + * @env: The load balancing environment. * @cpus: The set of CPUs under consideration for load-balancing. * @balance: Pointer to a variable indicating if this_cpu * is the appropriate cpu to perform load balancing at this_level. From 1e11ad8dc42975d5c2bab7d478f6cd875602eda4 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 8 Jun 2012 13:21:26 -0700 Subject: [PATCH 319/475] mm, oom: fix badness score underflow If the privileges given to root threads (3% of allowable memory) or a negative value of /proc/pid/oom_score_adj happen to exceed the amount of rss of a thread, its badness score overflows as a result of commit a7f638f999ff ("mm, oom: normalize oom scores to oom_score_adj scale only for userspace"). Fix this by making the type signed and return 1, meaning the thread is still eligible for kill, if the value is negative. Reported-by: Dave Jones Acked-by: Oleg Nesterov Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ed0e1967736..416637f0e92 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -183,7 +183,7 @@ static bool oom_unkillable_task(struct task_struct *p, unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { - unsigned long points; + long points; if (oom_unkillable_task(p, memcg, nodemask)) return 0; @@ -223,7 +223,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ - return points ? points : 1; + return points > 0 ? points : 1; } /* From ead188f9f930fb5d7f0c49315a7fce3d8bd16b7e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 8 Jun 2012 17:07:36 +0200 Subject: [PATCH 320/475] writeback: Fix lock imbalance in writeback_sb_inodes() Fix bug introduced by 169ebd90. We have to have wb_list_lock locked when restarting writeback loop after having waited for inode writeback. Bug description by Ted Tso: I can reproduce this fairly easily by using ext4 w/o a journal, running under KVM with 1024megs memory, with fsstress (xfstests #13): [ 45.153294] ===================================== [ 45.154784] [ BUG: bad unlock balance detected! ] [ 45.155591] 3.5.0-rc1-00002-gb22b1f1 #124 Not tainted [ 45.155591] ------------------------------------- [ 45.155591] flush-254:16/2499 is trying to release lock (&(&wb->list_lock)->rlock) at: [ 45.155591] [] writeback_sb_inodes+0x160/0x327 [ 45.155591] but there are no more locks to release! Reported-by: Theodore Ts'o Tested-by: Theodore Ts'o Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu --- fs/fs-writeback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8d2fb8c88cf..41a3ccff18d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -664,6 +664,7 @@ static long writeback_sb_inodes(struct super_block *sb, /* Wait for I_SYNC. This function drops i_lock... */ inode_sleep_on_writeback(inode); /* Inode may be gone, start again */ + spin_lock(&wb->list_lock); continue; } inode->i_state |= I_SYNC; From cfaf025112d3856637ff34a767ef785ef5cf2ca9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 8 Jun 2012 18:40:09 -0700 Subject: [PATCH 321/475] Linux 3.5-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0d718ede9ea..d845c2a1aa6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Saber-toothed Squirrel # *DOCUMENTATION* From c6c4b97c6b7003e8082dd43db224c1d1f7a24aa2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 8 Jun 2012 14:01:44 +0000 Subject: [PATCH 322/475] net/core: fix kernel-doc warnings Fix kernel-doc warnings in net/core: Warning(net/core/skbuff.c:3368): No description found for parameter 'delta_truesize' Warning(net/core/filter.c:628): No description found for parameter 'pfp' Warning(net/core/filter.c:628): Excess function parameter 'sk' description in 'sk_unattached_filter_create' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/core/filter.c | 4 ++-- net/core/skbuff.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index a3eddb515d1..d4ce2dc712e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -616,9 +616,9 @@ static int __sk_prepare_filter(struct sk_filter *fp) /** * sk_unattached_filter_create - create an unattached filter * @fprog: the filter program - * @sk: the socket to use + * @pfp: the unattached filter that is created * - * Create a filter independent ofr any socket. We first run some + * Create a filter independent of any socket. We first run some * sanity checks on it to make sure it does not explode on us later. * If an error occurs or there is insufficient memory for the filter * a negative errno code is returned. On success the return is zero. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 016694d6248..d78671e9d54 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3361,7 +3361,7 @@ EXPORT_SYMBOL(kfree_skb_partial); * @to: prior buffer * @from: buffer to add * @fragstolen: pointer to boolean - * + * @delta_truesize: how much more was allocated than was requested */ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) From f41ef2e7dc4515777810016612316c38f84275e7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 8 Jun 2012 14:07:19 +0000 Subject: [PATCH 323/475] netdev: fix drivers/net/phy/ kernel-doc warnings Fix kernel-doc warnings in drivers/net/phy: Warning(drivers/net/phy/mdio_bus.c:109): No description found for parameter 'mdio_bus_np' Warning(drivers/net/phy/mdio_bus.c:109): Excess function parameter 'mdio_np' description in 'of_mdio_find_bus' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 683ef1ce551..5061608f408 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -96,7 +96,7 @@ static int of_mdio_bus_match(struct device *dev, void *mdio_bus_np) } /** * of_mdio_find_bus - Given an mii_bus node, find the mii_bus. - * @mdio_np: Pointer to the mii_bus. + * @mdio_bus_np: Pointer to the mii_bus. * * Returns a pointer to the mii_bus, or NULL if none found. * From c91c3faea500da34d42a14735f1f67bb137b6413 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 9 Jun 2012 08:46:42 +0200 Subject: [PATCH 324/475] vga_switcheroo: Enable/disable audio clients at the right time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The audio clients have to be disabled before disabling the VGA and switching. Similarly, enabling the audio client should be done at last. Otherwise the audio-side operation stalls, eventually leading to Oops or lockups. Tested-by: Jörg-Volker Peetz Acked-by: Dave Airlie Signed-off-by: Takashi Iwai --- drivers/gpu/vga/vga_switcheroo.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index eb4f64f0a56..5b3c7d135dc 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -304,8 +304,6 @@ static int vga_switchto_stage1(struct vga_switcheroo_client *new_client) vga_switchon(new_client); vga_set_default_device(new_client->pdev); - set_audio_state(new_client->id, VGA_SWITCHEROO_ON); - return 0; } @@ -321,6 +319,8 @@ static int vga_switchto_stage2(struct vga_switcheroo_client *new_client) active->active = false; + set_audio_state(active->id, VGA_SWITCHEROO_OFF); + if (new_client->fb_info) { struct fb_event event; event.info = new_client->fb_info; @@ -334,11 +334,11 @@ static int vga_switchto_stage2(struct vga_switcheroo_client *new_client) if (new_client->ops->reprobe) new_client->ops->reprobe(new_client->pdev); - set_audio_state(active->id, VGA_SWITCHEROO_OFF); - if (active->pwr_state == VGA_SWITCHEROO_ON) vga_switchoff(active); + set_audio_state(new_client->id, VGA_SWITCHEROO_ON); + new_client->active = true; return 0; } @@ -384,8 +384,9 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf, /* pwr off the device not in use */ if (strncmp(usercmd, "OFF", 3) == 0) { list_for_each_entry(client, &vgasr_priv.clients, list) { - if (client->active) + if (client->active || client_is_audio(client)) continue; + set_audio_state(client->id, VGA_SWITCHEROO_OFF); if (client->pwr_state == VGA_SWITCHEROO_ON) vga_switchoff(client); } @@ -394,10 +395,11 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf, /* pwr on the device not in use */ if (strncmp(usercmd, "ON", 2) == 0) { list_for_each_entry(client, &vgasr_priv.clients, list) { - if (client->active) + if (client->active || client_is_audio(client)) continue; if (client->pwr_state == VGA_SWITCHEROO_OFF) vga_switchon(client); + set_audio_state(client->id, VGA_SWITCHEROO_ON); } goto out; } From 6f32d03bc63ab27f5e21d40c4ec81e18d991f424 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 May 2012 15:02:57 -0600 Subject: [PATCH 325/475] drivers/video: use correct __devexit_p annotation __devexit functions are discarded when CONFIG_HOTPLUG is not set, so the symbol needs to be referenced carefully. Signed-off-by: Arnd Bergmann Signed-off-by: Mathieu Poirier Signed-off-by: Florian Tobias Schandinat --- drivers/video/broadsheetfb.c | 2 +- drivers/video/mbx/mbxfb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/broadsheetfb.c b/drivers/video/broadsheetfb.c index 377dde3d5bf..c95b417d0d4 100644 --- a/drivers/video/broadsheetfb.c +++ b/drivers/video/broadsheetfb.c @@ -1211,7 +1211,7 @@ static int __devexit broadsheetfb_remove(struct platform_device *dev) static struct platform_driver broadsheetfb_driver = { .probe = broadsheetfb_probe, - .remove = broadsheetfb_remove, + .remove = __devexit_p(broadsheetfb_remove), .driver = { .owner = THIS_MODULE, .name = "broadsheetfb", diff --git a/drivers/video/mbx/mbxfb.c b/drivers/video/mbx/mbxfb.c index ab0a8e52733..85e4f44bfa6 100644 --- a/drivers/video/mbx/mbxfb.c +++ b/drivers/video/mbx/mbxfb.c @@ -1045,7 +1045,7 @@ static int __devexit mbxfb_remove(struct platform_device *dev) static struct platform_driver mbxfb_driver = { .probe = mbxfb_probe, - .remove = mbxfb_remove, + .remove = __devexit_p(mbxfb_remove), .suspend = mbxfb_suspend, .resume = mbxfb_resume, .driver = { From ff9440465c82099ba3365930a97f90b3acfcc980 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 May 2012 15:02:58 -0600 Subject: [PATCH 326/475] video/ili9320: do not mark exported functions __devexit No symbol can be exported when the section is discarded - the only solution I could think of is not to mark symbols as __devexit when they are exported. Signed-off-by: Arnd Bergmann Signed-off-by: Mathieu Poirier Signed-off-by: Florian Tobias Schandinat --- drivers/video/backlight/ili9320.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/backlight/ili9320.c b/drivers/video/backlight/ili9320.c index 5118a9f029a..b67485a595a 100644 --- a/drivers/video/backlight/ili9320.c +++ b/drivers/video/backlight/ili9320.c @@ -267,7 +267,7 @@ int __devinit ili9320_probe_spi(struct spi_device *spi, EXPORT_SYMBOL_GPL(ili9320_probe_spi); -int __devexit ili9320_remove(struct ili9320 *ili) +int ili9320_remove(struct ili9320 *ili) { ili9320_power(ili, FB_BLANK_POWERDOWN); From d399099cbba92b247ac323a99ac4c5fa5b0ca68d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 May 2012 15:02:59 -0600 Subject: [PATCH 327/475] video/console: automatically select a font The frame buffer console needs at least one font to be built into the kernel, so add the necessary Kconfig magic to guarantee that one of the available font is always on. If a user accidentally disables all fonts manually, the 8x16 font will be selected anyway. Signed-off-by: Arnd Bergmann Signed-off-by: Mathieu Poirier Signed-off-by: Florian Tobias Schandinat --- drivers/video/console/Kconfig | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index c2d11fef114..e2c96d01d8f 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -224,5 +224,19 @@ config FONT_10x18 big letters. It fits between the sun 12x22 and the normal 8x16 font. If other fonts are too big or too small for you, say Y, otherwise say N. +config FONT_AUTOSELECT + def_bool y + depends on FRAMEBUFFER_CONSOLE || SGI_NEWPORT_CONSOLE || STI_CONSOLE || USB_SISUSBVGA_CON + depends on !FONT_8x8 + depends on !FONT_6x11 + depends on !FONT_7x14 + depends on !FONT_PEARL_8x8 + depends on !FONT_ACORN_8x8 + depends on !FONT_MINI_4x6 + depends on !FONT_SUN8x16 + depends on !FONT_SUN12x22 + depends on !FONT_10x18 + select FONT_8x16 + endmenu From f5c3ac242d05c07f646824aba25a541af7464c82 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 May 2012 15:03:00 -0600 Subject: [PATCH 328/475] drivers/savagefb: use mdelay instead of udelay Long delays need to use mdelay on architectures such as ARM that cannot compute long timeouts in udelay. Signed-off-by: Arnd Bergmann Signed-off-by: Mathieu Poirier Signed-off-by: Florian Tobias Schandinat --- drivers/video/savage/savagefb_driver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c index cee7803a0a1..f3d3b9ce475 100644 --- a/drivers/video/savage/savagefb_driver.c +++ b/drivers/video/savage/savagefb_driver.c @@ -1351,7 +1351,7 @@ static void savagefb_set_par_int(struct savagefb_par *par, struct savage_reg *r /* following part not present in X11 driver */ cr67 = vga_in8(0x3d5, par) & 0xf; vga_out8(0x3d5, 0x50 | cr67, par); - udelay(10000); + mdelay(10); vga_out8(0x3d4, 0x67, par); /* end of part */ vga_out8(0x3d5, reg->CR67 & ~0x0c, par); @@ -1904,11 +1904,11 @@ static int savage_init_hw(struct savagefb_par *par) vga_out8(0x3d4, 0x66, par); cr66 = vga_in8(0x3d5, par); vga_out8(0x3d5, cr66 | 0x02, par); - udelay(10000); + mdelay(10); vga_out8(0x3d4, 0x66, par); vga_out8(0x3d5, cr66 & ~0x02, par); /* clear reset flag */ - udelay(10000); + mdelay(10); /* @@ -1918,11 +1918,11 @@ static int savage_init_hw(struct savagefb_par *par) vga_out8(0x3d4, 0x3f, par); cr3f = vga_in8(0x3d5, par); vga_out8(0x3d5, cr3f | 0x08, par); - udelay(10000); + mdelay(10); vga_out8(0x3d4, 0x3f, par); vga_out8(0x3d5, cr3f & ~0x08, par); /* clear reset flags */ - udelay(10000); + mdelay(10); /* Savage ramdac speeds */ par->numClocks = 4; From 5fc05780b84d5227e6c15986f1bde488e52752bd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 May 2012 15:03:01 -0600 Subject: [PATCH 329/475] drivers/tosa: driver needs I2C and SPI to compile This driver requires both SPI and I2C to build. Signed-off-by: Arnd Bergmann Signed-off-by: Mathieu Poirier Signed-off-by: Florian Tobias Schandinat --- drivers/video/backlight/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index af16884491e..d7487d23f58 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -88,7 +88,7 @@ config LCD_PLATFORM config LCD_TOSA tristate "Sharp SL-6000 LCD Driver" - depends on SPI && MACH_TOSA + depends on I2C && SPI && MACH_TOSA help If you have an Sharp SL-6000 Zaurus say Y to enable a driver for its LCD. From 906369e43c29001c39c7dfed8a01b9dff24ace75 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 8 Jun 2012 16:48:33 -0400 Subject: [PATCH 330/475] NFS: fix directio refcount bug on commit This reverts a hunk from commit 04277086577 "NFS: Clean up - Simplify reference counting in fs/nfs/direct.c" The cleanups in that patch affect the write path, but by the time processing hits commit the removed reference has been added back by nfs_scan_commit_list(). Without this reversion, any page that is sent to commit holds on to an unbalanced reference that is never freed. The immediate effect is an imbalance over the wire between OPENs and CLOSEs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b5385a7efd5..05099890a92 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -517,9 +517,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ - kref_get(&req->wb_kref); nfs_mark_request_commit(req, NULL, &cinfo); - } + } else + nfs_release_request(req); nfs_unlock_and_release_request(req); } From c5afc8da5b881633717bfc0510792428aa01fa3f Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 8 Jun 2012 11:32:56 -0400 Subject: [PATCH 331/475] NFS: Use the NFS_DEFAULT_VERSION for v2 and v3 mounts Older versions of nfs utils don't always pass a "vers=" mount option for NFS. This chould lead to attempts at using NFS v0 due to a zeroed out nfs_parsed_mount_data struct. I solve this by setting the default NFS version to NFS_DEFAULT_VERSION in the v2 and v3 cases (v4 has already been taken care of by a similar patch). Reported-by: Joerg Roedel Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bdd673141e9..906f09c7d84 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options, if (data == NULL) goto out_no_data; + args->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: data->namlen = 0; From 9271b0b4b2044c6db06051fe60bc58cdd4f17c7c Mon Sep 17 00:00:00 2001 From: Martin Pelikan Date: Sat, 9 Jun 2012 21:22:11 +0200 Subject: [PATCH 332/475] x86, um: Correct syscall table type attributes breaking gcc 4.8 The latest GCC 4.8 does some more checking on type attributes that break the build for ARCH=um -> fill them in. Specifically, the "asmlinkage" attributes is now tested for consistency. Signed-off-by: Martin Pelikan Link: http://lkml.kernel.org/r/1339269731-10772-1-git-send-email-pelikan@storkhole.cz Acked-by: Richard Weinberger Signed-off-by: H. Peter Anvin --- arch/x86/um/sys_call_table_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 416bd40c0eb..68d1dc91b37 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -39,9 +39,9 @@ #undef __SYSCALL_I386 #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, -typedef void (*sys_call_ptr_t)(void); +typedef asmlinkage void (*sys_call_ptr_t)(void); -extern void sys_ni_syscall(void); +extern asmlinkage void sys_ni_syscall(void); const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { /* From 8876d6b5f81f4e242a6660da22bbd92f17a8d058 Mon Sep 17 00:00:00 2001 From: Paul Pluzhnikov Date: Sat, 9 Jun 2012 07:53:03 -0700 Subject: [PATCH 333/475] net: Make linux/tcp.h C++ friendly (trivial) I originally sent this patch to , but Jiri Kosina did not feel that this is fully appropriate for the trivial tree. Using linux/tcp.h from C++ results in: cat t.cc #include int main() { } g++ -c t.cc In file included from t.cc:1: /usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression /usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression ... Attached trivial patch fixes this problem. Tested: - the t.cc above compiles with g++ and - the following program generates the same output before/after the patch: #include #include int main () { #define P(a) printf("%s: %08x\n", #a, (int)a) P(TCP_FLAG_CWR); P(TCP_FLAG_ECE); P(TCP_FLAG_URG); P(TCP_FLAG_ACK); P(TCP_FLAG_PSH); P(TCP_FLAG_RST); P(TCP_FLAG_SYN); P(TCP_FLAG_FIN); P(TCP_RESERVED_BITS); P(TCP_DATA_OFFSET); #undef P return 0; } Signed-off-by: Paul Pluzhnikov Signed-off-by: David S. Miller --- include/linux/tcp.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4c5b6328337..5f359dbfcdc 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -69,16 +69,16 @@ union tcp_word_hdr { #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) enum { - TCP_FLAG_CWR = __cpu_to_be32(0x00800000), - TCP_FLAG_ECE = __cpu_to_be32(0x00400000), - TCP_FLAG_URG = __cpu_to_be32(0x00200000), - TCP_FLAG_ACK = __cpu_to_be32(0x00100000), - TCP_FLAG_PSH = __cpu_to_be32(0x00080000), - TCP_FLAG_RST = __cpu_to_be32(0x00040000), - TCP_FLAG_SYN = __cpu_to_be32(0x00020000), - TCP_FLAG_FIN = __cpu_to_be32(0x00010000), - TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000), - TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000) + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) }; /* From d26098759cf6d32148649c165f87a7590bc25b89 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Sat, 9 Jun 2012 10:57:41 -0400 Subject: [PATCH 334/475] drm/radeon: fix tiling and command stream checking on evergreen v3 Fix regresson since the introduction of command stream checking on evergreen (thread referenced below). Issue is cause by ddx allocating bo with formula width*height*bpp while programming the GPU command stream with ALIGN(height, 8). In some case (where page alignment does not hide the extra size bo should be according to height alignment) the kernel will reject the command stream. This patch reprogram the command stream to slice - 1 (slice is a derivative value from height) which avoid rejecting the command stream while keeping the value of command stream checking from a security point of view. This patch also fix wrong computation of layer size for 2D tiled surface. Which should fix issue when 2D color tiling is enabled. This dump the radeon KMS_DRIVER_MINOR so userspace can know if they are on a fixed kernel or not. https://lkml.org/lkml/2012/6/3/80 https://bugs.freedesktop.org/show_bug.cgi?id=50892 https://bugs.freedesktop.org/show_bug.cgi?id=50857 !!! STABLE need a custom version of this patch for 3.4 !!! v2: actually bump the minor version and add comment about stable v3: do compute the height the ddx was trying to use [airlied: drop left over debug] Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen_cs.c | 49 ++++++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_drv.c | 3 +- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 4e7dd2b4843..c16554122cc 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -52,6 +52,7 @@ struct evergreen_cs_track { u32 cb_color_view[12]; u32 cb_color_pitch[12]; u32 cb_color_slice[12]; + u32 cb_color_slice_idx[12]; u32 cb_color_attrib[12]; u32 cb_color_cmask_slice[8];/* unused */ u32 cb_color_fmask_slice[8];/* unused */ @@ -127,12 +128,14 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) track->cb_color_info[i] = 0; track->cb_color_view[i] = 0xFFFFFFFF; track->cb_color_pitch[i] = 0; - track->cb_color_slice[i] = 0; + track->cb_color_slice[i] = 0xfffffff; + track->cb_color_slice_idx[i] = 0; } track->cb_target_mask = 0xFFFFFFFF; track->cb_shader_mask = 0xFFFFFFFF; track->cb_dirty = true; + track->db_depth_slice = 0xffffffff; track->db_depth_view = 0xFFFFC000; track->db_depth_size = 0xFFFFFFFF; track->db_depth_control = 0xFFFFFFFF; @@ -250,10 +253,9 @@ static int evergreen_surface_check_2d(struct radeon_cs_parser *p, { struct evergreen_cs_track *track = p->track; unsigned palign, halign, tileb, slice_pt; + unsigned mtile_pr, mtile_ps, mtileb; tileb = 64 * surf->bpe * surf->nsamples; - palign = track->group_size / (8 * surf->bpe * surf->nsamples); - palign = MAX(8, palign); slice_pt = 1; if (tileb > surf->tsplit) { slice_pt = tileb / surf->tsplit; @@ -262,7 +264,10 @@ static int evergreen_surface_check_2d(struct radeon_cs_parser *p, /* macro tile width & height */ palign = (8 * surf->bankw * track->npipes) * surf->mtilea; halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea; - surf->layer_size = surf->nbx * surf->nby * surf->bpe * slice_pt; + mtileb = (palign / 8) * (halign / 8) * tileb;; + mtile_pr = surf->nbx / palign; + mtile_ps = (mtile_pr * surf->nby) / halign; + surf->layer_size = mtile_ps * mtileb * slice_pt; surf->base_align = (palign / 8) * (halign / 8) * tileb; surf->palign = palign; surf->halign = halign; @@ -434,6 +439,39 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i offset += surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { + /* old ddx are broken they allocate bo with w*h*bpp but + * program slice with ALIGN(h, 8), catch this and patch + * command stream. + */ + if (!surf.mode) { + volatile u32 *ib = p->ib.ptr; + unsigned long tmp, nby, bsize, size, min = 0; + + /* find the height the ddx wants */ + if (surf.nby > 8) { + min = surf.nby - 8; + } + bsize = radeon_bo_size(track->cb_color_bo[id]); + tmp = track->cb_color_bo_offset[id] << 8; + for (nby = surf.nby; nby > min; nby--) { + size = nby * surf.nbx * surf.bpe * surf.nsamples; + if ((tmp + size * mslice) <= bsize) { + break; + } + } + if (nby > min) { + surf.nby = nby; + slice = ((nby * surf.nbx) / 64) - 1; + if (!evergreen_surface_check(p, &surf, "cb")) { + /* check if this one works */ + tmp += surf.layer_size * mslice; + if (tmp <= bsize) { + ib[track->cb_color_slice_idx[id]] = slice; + goto old_ddx_ok; + } + } + } + } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " "offset %d, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, @@ -446,6 +484,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i surf.tsplit, surf.mtilea); return -EINVAL; } +old_ddx_ok: return 0; } @@ -1532,6 +1571,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_SLICE: tmp = (reg - CB_COLOR0_SLICE) / 0x3c; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); + track->cb_color_slice_idx[tmp] = idx; track->cb_dirty = true; break; case CB_COLOR8_SLICE: @@ -1540,6 +1580,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_SLICE: tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); + track->cb_color_slice_idx[tmp] = idx; track->cb_dirty = true; break; case CB_COLOR0_ATTRIB: diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index f0bb2b543b1..03e5f5df40f 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -57,9 +57,10 @@ * 2.13.0 - virtual memory support, streamout * 2.14.0 - add evergreen tiling informations * 2.15.0 - add max_pipes query + * 2.16.0 - fix evergreen 2D tiled surface calculation */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 15 +#define KMS_DRIVER_MINOR 16 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); From 7dbb491878a2c51d372a8890fa45a8ff80358af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fran=C3=A7ois=20romieu?= Date: Sat, 9 Jun 2012 10:53:16 +0000 Subject: [PATCH 335/475] r8169: avoid NAPI scheduling delay. While reworking the r8169 driver a few months ago to perform the smallest amount of work in the irq handler, I took care of avoiding any irq mask register operation in the slow work dedicated user context thread. The slow work thread scheduled an extra round of NAPI work which would ultimately set the irq mask register as required, thus keeping such irq mask operations in the NAPI handler. It would eventually race with the irq handler and delay NAPI execution for - assuming no further irq - a whole ksoftirqd period. Mildly a problem for rare link changes or corner case PCI events. The race was always lost after the last bh disabling lock had been removed from the work thread and people started wondering where those pesky "NOHZ: local_softirq_pending 08" messages came from. Actually the irq mask register _can_ be set up directly in the slow work thread. Signed-off-by: Francois Romieu Reported-by: Dave Jones Tested-by: Marc Dionne Cc: Thomas Gleixner Cc: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 9757ce3543a..7260aa79466 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5889,11 +5889,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp) if (status & LinkChg) __rtl8169_check_link_status(dev, tp, tp->mmio_addr, true); - napi_disable(&tp->napi); - rtl_irq_disable(tp); - - napi_enable(&tp->napi); - napi_schedule(&tp->napi); + rtl_irq_enable_all(tp); } static void rtl_task(struct work_struct *work) From 83a27052c3376793bc879e00e6e6805d6fb7aab9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Jun 2012 22:35:24 +0000 Subject: [PATCH 336/475] virtio-net: fix a race on 32bit arches commit 3fa2a1df909 (virtio-net: per cpu 64 bit stats (v2)) added a race on 32bit arches. We must use separate syncp for rx and tx path as they can be run at the same time on different cpus. Thus one sequence increment can be lost and readers spin forever. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Cc: Michael S. Tsirkin Cc: Jason Wang Acked-by: Rusty Russell Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5214b1eceb9..f18149ae258 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -42,7 +42,8 @@ module_param(gso, bool, 0444); #define VIRTNET_DRIVER_VERSION "1.0.0" struct virtnet_stats { - struct u64_stats_sync syncp; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; u64 tx_bytes; u64 tx_packets; @@ -300,10 +301,10 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len) hdr = skb_vnet_hdr(skb); - u64_stats_update_begin(&stats->syncp); + u64_stats_update_begin(&stats->rx_syncp); stats->rx_bytes += skb->len; stats->rx_packets++; - u64_stats_update_end(&stats->syncp); + u64_stats_update_end(&stats->rx_syncp); if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); @@ -565,10 +566,10 @@ static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); - u64_stats_update_begin(&stats->syncp); + u64_stats_update_begin(&stats->tx_syncp); stats->tx_bytes += skb->len; stats->tx_packets++; - u64_stats_update_end(&stats->syncp); + u64_stats_update_end(&stats->tx_syncp); tot_sgs += skb_vnet_hdr(skb)->num_sg; dev_kfree_skb_any(skb); @@ -703,12 +704,16 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, u64 tpackets, tbytes, rpackets, rbytes; do { - start = u64_stats_fetch_begin(&stats->syncp); + start = u64_stats_fetch_begin(&stats->tx_syncp); tpackets = stats->tx_packets; tbytes = stats->tx_bytes; + } while (u64_stats_fetch_retry(&stats->tx_syncp, start)); + + do { + start = u64_stats_fetch_begin(&stats->rx_syncp); rpackets = stats->rx_packets; rbytes = stats->rx_bytes; - } while (u64_stats_fetch_retry(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->rx_syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; From 3777808873b0c49c5cf27e44c948dfb02675d578 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 11 Jun 2012 14:29:58 +0900 Subject: [PATCH 337/475] bug.h: need linux/kernel.h for TAINT_WARN. asm-generic/bug.h uses taint flags that are only defined in linux/kernel.h, resulting in build failures on platforms that don't include linux/kernel.h some other way: arch/sh/include/asm/thread_info.h:172:2: error: 'TAINT_WARN' undeclared (first use in this function) Caused by commit edd63a2763bd ("set_restore_sigmask() is never called without SIGPENDING (and never should be)"). Reported-by: Stephen Rothwell Cc: Al Viro Signed-off-by: Paul Mundt --- include/asm-generic/bug.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 2520a6e241d..9f02005f217 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -2,6 +2,7 @@ #define _ASM_GENERIC_BUG_H #include +#include #ifdef CONFIG_BUG From 7d0c399fe94d4fe572eadc7405654a282e5df63d Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 25 May 2012 13:36:43 +0900 Subject: [PATCH 338/475] clocksource: sh_cmt: Convert timer lock to raw spinlock. Signed-off-by: Paul Mundt --- drivers/clocksource/sh_cmt.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 32fe9ef5cc5..98b06baafcc 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -48,13 +48,13 @@ struct sh_cmt_priv { unsigned long next_match_value; unsigned long max_match_value; unsigned long rate; - spinlock_t lock; + raw_spinlock_t lock; struct clock_event_device ced; struct clocksource cs; unsigned long total_cycles; }; -static DEFINE_SPINLOCK(sh_cmt_lock); +static DEFINE_RAW_SPINLOCK(sh_cmt_lock); #define CMSTR -1 /* shared register */ #define CMCSR 0 /* channel register */ @@ -139,7 +139,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) unsigned long flags, value; /* start stop register shared by multiple timer channels */ - spin_lock_irqsave(&sh_cmt_lock, flags); + raw_spin_lock_irqsave(&sh_cmt_lock, flags); value = sh_cmt_read(p, CMSTR); if (start) @@ -148,7 +148,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) value &= ~(1 << cfg->timer_bit); sh_cmt_write(p, CMSTR, value); - spin_unlock_irqrestore(&sh_cmt_lock, flags); + raw_spin_unlock_irqrestore(&sh_cmt_lock, flags); } static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) @@ -328,9 +328,9 @@ static void sh_cmt_set_next(struct sh_cmt_priv *p, unsigned long delta) { unsigned long flags; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); __sh_cmt_set_next(p, delta); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static irqreturn_t sh_cmt_interrupt(int irq, void *dev_id) @@ -385,7 +385,7 @@ static int sh_cmt_start(struct sh_cmt_priv *p, unsigned long flag) int ret = 0; unsigned long flags; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); if (!(p->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) ret = sh_cmt_enable(p, &p->rate); @@ -398,7 +398,7 @@ static int sh_cmt_start(struct sh_cmt_priv *p, unsigned long flag) if ((flag == FLAG_CLOCKSOURCE) && (!(p->flags & FLAG_CLOCKEVENT))) __sh_cmt_set_next(p, p->max_match_value); out: - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); return ret; } @@ -408,7 +408,7 @@ static void sh_cmt_stop(struct sh_cmt_priv *p, unsigned long flag) unsigned long flags; unsigned long f; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); f = p->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE); p->flags &= ~flag; @@ -420,7 +420,7 @@ static void sh_cmt_stop(struct sh_cmt_priv *p, unsigned long flag) if ((flag == FLAG_CLOCKEVENT) && (p->flags & FLAG_CLOCKSOURCE)) __sh_cmt_set_next(p, p->max_match_value); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static struct sh_cmt_priv *cs_to_sh_cmt(struct clocksource *cs) @@ -435,13 +435,13 @@ static cycle_t sh_cmt_clocksource_read(struct clocksource *cs) unsigned long value; int has_wrapped; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); value = p->total_cycles; raw = sh_cmt_get_counter(p, &has_wrapped); if (unlikely(has_wrapped)) raw += p->match_value + 1; - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); return value + raw; } @@ -591,7 +591,7 @@ static int sh_cmt_register(struct sh_cmt_priv *p, char *name, p->max_match_value = (1 << p->width) - 1; p->match_value = p->max_match_value; - spin_lock_init(&p->lock); + raw_spin_lock_init(&p->lock); if (clockevent_rating) sh_cmt_register_clockevent(p, name, clockevent_rating); From 50393a92c89c603e2d043c9f0212d3bd66701c86 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 25 May 2012 13:38:54 +0900 Subject: [PATCH 339/475] clocksource: sh_mtu2: Convert timer lock to raw spinlock. Signed-off-by: Paul Mundt --- drivers/clocksource/sh_mtu2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c index a2172f69041..d9b76ca64a6 100644 --- a/drivers/clocksource/sh_mtu2.c +++ b/drivers/clocksource/sh_mtu2.c @@ -43,7 +43,7 @@ struct sh_mtu2_priv { struct clock_event_device ced; }; -static DEFINE_SPINLOCK(sh_mtu2_lock); +static DEFINE_RAW_SPINLOCK(sh_mtu2_lock); #define TSTR -1 /* shared register */ #define TCR 0 /* channel register */ @@ -107,7 +107,7 @@ static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start) unsigned long flags, value; /* start stop register shared by multiple timer channels */ - spin_lock_irqsave(&sh_mtu2_lock, flags); + raw_spin_lock_irqsave(&sh_mtu2_lock, flags); value = sh_mtu2_read(p, TSTR); if (start) @@ -116,7 +116,7 @@ static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start) value &= ~(1 << cfg->timer_bit); sh_mtu2_write(p, TSTR, value); - spin_unlock_irqrestore(&sh_mtu2_lock, flags); + raw_spin_unlock_irqrestore(&sh_mtu2_lock, flags); } static int sh_mtu2_enable(struct sh_mtu2_priv *p) From c2225a57e596a308424e59abc7e864f866fe4493 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 25 May 2012 13:39:09 +0900 Subject: [PATCH 340/475] clocksource: sh_tmu: Convert timer lock to raw spinlock. Signed-off-by: Paul Mundt --- drivers/clocksource/sh_tmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 97f54b634be..852b3f19a55 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -45,7 +45,7 @@ struct sh_tmu_priv { struct clocksource cs; }; -static DEFINE_SPINLOCK(sh_tmu_lock); +static DEFINE_RAW_SPINLOCK(sh_tmu_lock); #define TSTR -1 /* shared register */ #define TCOR 0 /* channel register */ @@ -95,7 +95,7 @@ static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start) unsigned long flags, value; /* start stop register shared by multiple timer channels */ - spin_lock_irqsave(&sh_tmu_lock, flags); + raw_spin_lock_irqsave(&sh_tmu_lock, flags); value = sh_tmu_read(p, TSTR); if (start) @@ -104,7 +104,7 @@ static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start) value &= ~(1 << cfg->timer_bit); sh_tmu_write(p, TSTR, value); - spin_unlock_irqrestore(&sh_tmu_lock, flags); + raw_spin_unlock_irqrestore(&sh_tmu_lock, flags); } static int sh_tmu_enable(struct sh_tmu_priv *p) From c5d21c4b2a7765ab0600c8426374b50eb9f4a36f Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 10 Jun 2012 20:05:24 +0000 Subject: [PATCH 341/475] net: Reorder initialization in ip_route_output to fix gcc warning If I build with W=1, for every file that includes , I get the warning include/net/route.h: In function 'ip_route_output': include/net/route.h:135:3: warning: initialized field overwritten [-Woverride-init] include/net/route.h:135:3: warning: (near initialization for 'fl4') [-Woverride-init] (This is with "gcc (Debian 4.6.3-1) 4.6.3") A fix seems pretty trivial: move the initialization of .flowi4_tos earlier. As far as I can tell, this has no effect on code generation. Signed-off-by: Roland Dreier Signed-off-by: David S. Miller --- include/net/route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/route.h b/include/net/route.h index ed2b78e2375..98705468ac0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -130,9 +130,9 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, { struct flowi4 fl4 = { .flowi4_oif = oif, + .flowi4_tos = tos, .daddr = daddr, .saddr = saddr, - .flowi4_tos = tos, }; return ip_route_output_key(net, &fl4); } From 3977407e83129f53e43d3ac44be8702f59fa3f77 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 11 Jun 2012 17:10:16 +0900 Subject: [PATCH 342/475] clocksource: sh_tmu: Use clockevents_config_and_register(). This switches over to the now exported clockevents_config() and clockevents_config_and_register() helpers. This knocks off a long-standing TMU TODO item. Signed-off-by: Paul Mundt --- drivers/clocksource/sh_tmu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 852b3f19a55..c1b51d49d10 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -245,12 +245,7 @@ static void sh_tmu_clock_event_start(struct sh_tmu_priv *p, int periodic) sh_tmu_enable(p); - /* TODO: calculate good shift from rate and counter bit width */ - - ced->shift = 32; - ced->mult = div_sc(p->rate, NSEC_PER_SEC, ced->shift); - ced->max_delta_ns = clockevent_delta2ns(0xffffffff, ced); - ced->min_delta_ns = 5000; + clockevents_config(ced, p->rate); if (periodic) { p->periodic = (p->rate + HZ/2) / HZ; @@ -323,7 +318,8 @@ static void sh_tmu_register_clockevent(struct sh_tmu_priv *p, ced->set_mode = sh_tmu_clock_event_mode; dev_info(&p->pdev->dev, "used for clock events\n"); - clockevents_register_device(ced); + + clockevents_config_and_register(ced, 1, 0x300, 0xffffffff); ret = setup_irq(p->irqaction.irq, &p->irqaction); if (ret) { From 16b0dc29c1af9df341428f4c49ada4f626258082 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Jun 2012 21:11:57 +0000 Subject: [PATCH 343/475] dummy: fix rcu_sched self-detected stalls Trying to "modprobe dummy numdummies=30000" triggers : INFO: rcu_sched self-detected stall on CPU { 8} (t=60000 jiffies) After this splat, RTNL is locked and reboot is needed. We must call cond_resched() to avoid this, even holding RTNL. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/dummy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 442d91a2747..bab0158f1cc 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -187,8 +187,10 @@ static int __init dummy_init_module(void) rtnl_lock(); err = __rtnl_link_register(&dummy_link_ops); - for (i = 0; i < numdummies && !err; i++) + for (i = 0; i < numdummies && !err; i++) { err = dummy_init_one(); + cond_resched(); + } if (err < 0) __rtnl_link_unregister(&dummy_link_ops); rtnl_unlock(); From 9efc31b81d740487fd204fade0913ffa8cc97fc3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 10 Jun 2012 10:50:52 +0800 Subject: [PATCH 344/475] x86/mm: Fix some kernel-doc warnings Fix kernel-doc warnings in arch/x86/mm/ioremap.c and arch/x86/mm/pageattr.c, just like this one: Warning(arch/x86/mm/ioremap.c:204): No description found for parameter 'phys_addr' Warning(arch/x86/mm/ioremap.c:204): Excess function parameter 'offset' description in 'ioremap_nocache' Signed-off-by: Wanpeng Li Cc: Gavin Shan Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1339296652-2935-1-git-send-email-liwp.linux@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 ++-- arch/x86/mm/pageattr.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index be1ef574ce9..78fe3f1ac49 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -180,7 +180,7 @@ err_free_memtype: /** * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory + * @phys_addr: bus address of the memory * @size: size of the resource to map * * ioremap_nocache performs a platform specific sequence of operations to @@ -217,7 +217,7 @@ EXPORT_SYMBOL(ioremap_nocache); /** * ioremap_wc - map memory into CPU space write combined - * @offset: bus address of the memory + * @phys_addr: bus address of the memory * @size: size of the resource to map * * This version of ioremap ensures that the memory is marked write combining. diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e1ebde31521..a718e0d2350 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -122,7 +122,7 @@ within(unsigned long addr, unsigned long start, unsigned long end) /** * clflush_cache_range - flush a cache range with clflush - * @addr: virtual start address + * @vaddr: virtual start address * @size: number of bytes to flush * * clflush is an unordered instruction which needs fencing with mfence From e53f517ff236a0ec5413ff3935c53406b69bc1e2 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 6 Jun 2012 08:03:35 +0200 Subject: [PATCH 345/475] ARM: dma-mapping: Add missing static storage class specifier Fixes the following sparse warnings: arch/arm/mm/dma-mapping.c:231:15: warning: symbol 'consistent_base' was not declared. Should it be static? arch/arm/mm/dma-mapping.c:326:8: warning: symbol 'coherent_pool_size' was not declared. Should it be static? Signed-off-by: Sachin Kamat Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 106c4c0ebcc..d766e4256b7 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -228,7 +228,7 @@ static pte_t **consistent_pte; #define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M -unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; +static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; void __init init_consistent_dma_size(unsigned long size) { @@ -321,7 +321,7 @@ static struct arm_vmregion_head coherent_head = { .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), }; -size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; +static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; static int __init early_coherent_pool(char *p) { From 4986e5c7cd91817d0f58dd15073c9080d47980cf Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 6 Jun 2012 12:05:01 +0200 Subject: [PATCH 346/475] ARM: mm: fix type of the arm_dma_limit global variable arm_dma_limit stores physical address of maximal address accessible by DMA, so the phys_addr_t type makes much more sense for it instead of u32. This patch fixes the following build warning: arch/arm/mm/init.c:380: warning: comparison of distinct pointer types lacks a cast Reported-by: Russell King Signed-off-by: Marek Szyprowski --- arch/arm/mm/init.c | 2 +- arch/arm/mm/mm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c21d06c7dd7..f54d5921976 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL(arm_dma_zone_size); * allocations. This must be the smallest DMA mask in the system, * so a successful GFP_DMA allocation will always satisfy this. */ -u32 arm_dma_limit; +phys_addr_t arm_dma_limit; static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, unsigned long dma_size) diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 93dc0c17cdc..c471436c795 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -62,7 +62,7 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page #endif #ifdef CONFIG_ZONE_DMA -extern u32 arm_dma_limit; +extern phys_addr_t arm_dma_limit; #else #define arm_dma_limit ((u32)~0) #endif From f2a8ecaf6d7259cc70fb5898823eab9a2dd86570 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 11 Jun 2012 15:51:54 +0200 Subject: [PATCH 347/475] ALSA: hda - Fix detection of Creative SoundCore3D controllers The PCI ID entries of Creative SoundCore3D HD-audio controllers should be before the wildcard for vendor = Creative. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e172c5d6855..02763827dde 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -3354,6 +3354,11 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = { { PCI_DEVICE(0x6549, 0x1200), .driver_data = AZX_DRIVER_TERA | AZX_DCAPS_NO_64BIT }, /* Creative X-Fi (CA0110-IBG) */ + /* CTHDA chips */ + { PCI_DEVICE(0x1102, 0x0010), + .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, + { PCI_DEVICE(0x1102, 0x0012), + .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, #if !defined(CONFIG_SND_CTXFI) && !defined(CONFIG_SND_CTXFI_MODULE) /* the following entry conflicts with snd-ctxfi driver, * as ctxfi driver mutates from HD-audio to native mode with @@ -3370,11 +3375,6 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = { .driver_data = AZX_DRIVER_CTX | AZX_DCAPS_CTX_WORKAROUND | AZX_DCAPS_RIRB_PRE_DELAY | AZX_DCAPS_POSFIX_LPIB }, #endif - /* CTHDA chips */ - { PCI_DEVICE(0x1102, 0x0010), - .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, - { PCI_DEVICE(0x1102, 0x0012), - .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, /* Vortex86MX */ { PCI_DEVICE(0x17f3, 0x3010), .driver_data = AZX_DRIVER_GENERIC }, /* VMware HDAudio */ From e30478598a8476d02e3b00caa89ce1a3b1dad54b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Jun 2012 23:24:00 +0000 Subject: [PATCH 348/475] lpc_eth: add missing ndo_change_mtu() lpc_eth does a copy of transmitted skbs to DMA area, without checking skb lengths, so can trigger buffer overflows : memcpy(pldat->tx_buff_v + txidx * ENET_MAXF_SIZE, skb->data, len); One way to get bigger skbs is to allow MTU changes above the 1500 limit. Calling eth_change_mtu() in ndo_change_mtu() makes sure this cannot happen. Signed-off-by: Eric Dumazet Cc: Roland Stigge Cc: Kevin Wells Acked-by: Roland Stigge Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 8d2666fcffd..10febdc2bd1 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1320,6 +1320,7 @@ static const struct net_device_ops lpc_netdev_ops = { .ndo_set_rx_mode = lpc_eth_set_multicast_list, .ndo_do_ioctl = lpc_eth_ioctl, .ndo_set_mac_address = lpc_set_mac_address, + .ndo_change_mtu = eth_change_mtu, }; static int lpc_eth_drv_probe(struct platform_device *pdev) From 3f16da51b0e533871d22a29682f3c3969d4f7e22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Jun 2012 07:21:36 +0000 Subject: [PATCH 349/475] lpc_eth: fix tx completion __lpc_handle_xmit() has two bugs : 1) It can leak skbs in case TXSTATUS_ERROR is set 2) It can wake up txqueue while no slot was freed. Signed-off-by: Eric Dumazet Reported-by: Roland Stigge Tested-by: Roland Stigge Cc: Kevin Wells Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 10febdc2bd1..083d6715335 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -946,16 +946,16 @@ static void __lpc_handle_xmit(struct net_device *ndev) /* Update stats */ ndev->stats.tx_packets++; ndev->stats.tx_bytes += skb->len; - - /* Free buffer */ - dev_kfree_skb_irq(skb); } + dev_kfree_skb_irq(skb); txcidx = readl(LPC_ENET_TXCONSUMEINDEX(pldat->net_base)); } - if (netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + if (pldat->num_used_tx_buffs <= ENET_TX_DESC/2) { + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } } static int __lpc_handle_recv(struct net_device *ndev, int budget) From 64f9a83665e4f168ff50ce1db9eb175f954048fa Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 31 May 2012 17:45:00 +0100 Subject: [PATCH 350/475] NFSv2: EOF incorrectly set on short read In cases where the server returns fewer bytes then those requested, we can incorrectly set the eof flag for the file. Fixing this allows the request to be retried with updated offset and count arguments. Signed-off-by: Sachin Prabhu Signed-off-by: Trond Myklebust --- fs/nfs/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index a706b6bcc28..617c7419a08 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -651,7 +651,7 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ - if (data->args.offset + data->args.count >= data->res.fattr->size) + if (data->args.offset + data->res.count >= data->res.fattr->size) data->res.eof = 1; } return 0; From 2669940db8d02147181e338bb6db98394569f31a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 30 May 2012 16:12:24 -0400 Subject: [PATCH 351/475] NFSv4 do not send an empty SETATTR compound Commit 536e43d12b9517bbbf6114cd1a12be27857a4d7a ATTR_OPEN check can result in an ia_valid with only ATTR_FILE set, and no NFS_VALID_ATTRS attributes to request from the server. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f23977e4ac0..15fc7e4664e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2549,6 +2549,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); + /* Deal with open(O_TRUNC) */ + if (sattr->ia_valid & ATTR_OPEN) + sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + + /* Optimization: if the end result is no change, don't RPC */ + if ((sattr->ia_valid & ~(ATTR_FILE)) == 0) + return 0; + /* Search for an existing open(O_WRITE) file */ if (sattr->ia_valid & ATTR_FILE) { struct nfs_open_context *ctx; @@ -2560,10 +2568,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } } - /* Deal with open(O_TRUNC) */ - if (sattr->ia_valid & ATTR_OPEN) - sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); - status = nfs4_do_setattr(inode, cred, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); From 92123e068efa310b09e9943ac1cfd10ff6b6d2e4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Jun 2012 10:03:42 -0400 Subject: [PATCH 352/475] rpc_pipefs: allow rpc_purge_list to take a NULL waitq pointer In the event that we don't have a dentry for a rpc_pipefs pipe, we still need to allow the queue_timeout job to clean out the queue. There's just no waitq to wake up in that event. Cc: stable@kernel.org Reported-by: Hans de Bruin Reported-by: Joerg Platte Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 04040476082..21fde99e5c5 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -71,7 +71,9 @@ static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); - wake_up(waitq); + + if (waitq) + wake_up(waitq); } static void @@ -91,11 +93,9 @@ rpc_timeout_upcall_queue(struct work_struct *work) } dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); - if (dentry) { - rpc_purge_list(&RPC_I(dentry->d_inode)->waitq, - &free_list, destroy_msg, -ETIMEDOUT); - dput(dentry); - } + rpc_purge_list(dentry ? &RPC_I(dentry->d_inode)->waitq : NULL, + &free_list, destroy_msg, -ETIMEDOUT); + dput(dentry); } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, From 883ffd6e64114495f68652721065149a2bbd9de6 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Thu, 7 Jun 2012 23:21:05 +0000 Subject: [PATCH 353/475] net: stmmac: Fix clock en-/disable calls clk_{un}prepare is mandatory for platforms using common clock framework. Since these drivers are used by SPEAr platform, which supports common clock framework, add clk_{un}prepare() support for them. Otherwise the clocks are not correctly en-/disabled and ethernet support doesn't work. Signed-off-by: Stefan Roese Cc: Viresh Kumar Cc: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 94b21b409d8..dc20c56efc9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -109,7 +109,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, static inline int stmmac_clk_enable(struct stmmac_priv *priv) { if (!IS_ERR(priv->stmmac_clk)) - return clk_enable(priv->stmmac_clk); + return clk_prepare_enable(priv->stmmac_clk); return 0; } @@ -119,7 +119,7 @@ static inline void stmmac_clk_disable(struct stmmac_priv *priv) if (IS_ERR(priv->stmmac_clk)) return; - clk_disable(priv->stmmac_clk); + clk_disable_unprepare(priv->stmmac_clk); } static inline int stmmac_clk_get(struct stmmac_priv *priv) { From b7abee6ef888117f92db370620ebf116a38e3f4d Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Thu, 7 Jun 2012 12:56:54 +0000 Subject: [PATCH 354/475] tg3: Apply short DMA frag workaround to 5906 5906 devices also need the short DMA fragment workaround. This patch makes the necessary change. Signed-off-by: Matt Carlson Tested-by: Christian Kujau Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index edeeb516807..e47ff8be1d7 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14275,7 +14275,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) } } - if (tg3_flag(tp, 5755_PLUS)) + if (tg3_flag(tp, 5755_PLUS) || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tg3_flag_set(tp, SHORT_DMA_BUG); if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) From e3d62d7e8e05a6a4b08f4672385ae58fc0f132c4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 7 Jun 2012 10:45:02 +0000 Subject: [PATCH 355/475] tilegx network driver: initial support This change adds support for the tilegx network driver based on the GXIO IORPC support in the tilegx software stack, using the on-chip mPIPE packet processing engine. Signed-off-by: Chris Metcalf Signed-off-by: David S. Miller --- drivers/net/ethernet/tile/Kconfig | 2 + drivers/net/ethernet/tile/Makefile | 4 +- drivers/net/ethernet/tile/tilegx.c | 1898 ++++++++++++++++++++++++++++ 3 files changed, 1902 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/tile/tilegx.c diff --git a/drivers/net/ethernet/tile/Kconfig b/drivers/net/ethernet/tile/Kconfig index 2d9218f86bc..098b1c42b39 100644 --- a/drivers/net/ethernet/tile/Kconfig +++ b/drivers/net/ethernet/tile/Kconfig @@ -7,6 +7,8 @@ config TILE_NET depends on TILE default y select CRC32 + select TILE_GXIO_MPIPE if TILEGX + select HIGH_RES_TIMERS if TILEGX ---help--- This is a standard Linux network device driver for the on-chip Tilera Gigabit Ethernet and XAUI interfaces. diff --git a/drivers/net/ethernet/tile/Makefile b/drivers/net/ethernet/tile/Makefile index f634f142cab..0ef9eefd321 100644 --- a/drivers/net/ethernet/tile/Makefile +++ b/drivers/net/ethernet/tile/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_TILE_NET) += tile_net.o ifdef CONFIG_TILEGX -tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o +tile_net-y := tilegx.o else -tile_net-objs := tilepro.o +tile_net-y := tilepro.o endif diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c new file mode 100644 index 00000000000..83b4b388ad4 --- /dev/null +++ b/drivers/net/ethernet/tile/tilegx.c @@ -0,0 +1,1898 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include /* printk() */ +#include /* kmalloc() */ +#include /* error codes */ +#include /* size_t */ +#include +#include +#include +#include /* struct device, and other headers */ +#include /* eth_type_trans */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Default transmit lockup timeout period, in jiffies. */ +#define TILE_NET_TIMEOUT (5 * HZ) + +/* The maximum number of distinct channels (idesc.channel is 5 bits). */ +#define TILE_NET_CHANNELS 32 + +/* Maximum number of idescs to handle per "poll". */ +#define TILE_NET_BATCH 128 + +/* Maximum number of packets to handle per "poll". */ +#define TILE_NET_WEIGHT 64 + +/* Number of entries in each iqueue. */ +#define IQUEUE_ENTRIES 512 + +/* Number of entries in each equeue. */ +#define EQUEUE_ENTRIES 2048 + +/* Total header bytes per equeue slot. Must be big enough for 2 bytes + * of NET_IP_ALIGN alignment, plus 14 bytes (?) of L2 header, plus up to + * 60 bytes of actual TCP header. We round up to align to cache lines. + */ +#define HEADER_BYTES 128 + +/* Maximum completions per cpu per device (must be a power of two). + * ISSUE: What is the right number here? If this is too small, then + * egress might block waiting for free space in a completions array. + * ISSUE: At the least, allocate these only for initialized echannels. + */ +#define TILE_NET_MAX_COMPS 64 + +#define MAX_FRAGS (MAX_SKB_FRAGS + 1) + +/* Size of completions data to allocate. + * ISSUE: Probably more than needed since we don't use all the channels. + */ +#define COMPS_SIZE (TILE_NET_CHANNELS * sizeof(struct tile_net_comps)) + +/* Size of NotifRing data to allocate. */ +#define NOTIF_RING_SIZE (IQUEUE_ENTRIES * sizeof(gxio_mpipe_idesc_t)) + +/* Timeout to wake the per-device TX timer after we stop the queue. + * We don't want the timeout too short (adds overhead, and might end + * up causing stop/wake/stop/wake cycles) or too long (affects performance). + * For the 10 Gb NIC, 30 usec means roughly 30+ 1500-byte packets. + */ +#define TX_TIMER_DELAY_USEC 30 + +/* Timeout to wake the per-cpu egress timer to free completions. */ +#define EGRESS_TIMER_DELAY_USEC 1000 + +MODULE_AUTHOR("Tilera Corporation"); +MODULE_LICENSE("GPL"); + +/* A "packet fragment" (a chunk of memory). */ +struct frag { + void *buf; + size_t length; +}; + +/* A single completion. */ +struct tile_net_comp { + /* The "complete_count" when the completion will be complete. */ + s64 when; + /* The buffer to be freed when the completion is complete. */ + struct sk_buff *skb; +}; + +/* The completions for a given cpu and echannel. */ +struct tile_net_comps { + /* The completions. */ + struct tile_net_comp comp_queue[TILE_NET_MAX_COMPS]; + /* The number of completions used. */ + unsigned long comp_next; + /* The number of completions freed. */ + unsigned long comp_last; +}; + +/* The transmit wake timer for a given cpu and echannel. */ +struct tile_net_tx_wake { + struct hrtimer timer; + struct net_device *dev; +}; + +/* Info for a specific cpu. */ +struct tile_net_info { + /* The NAPI struct. */ + struct napi_struct napi; + /* Packet queue. */ + gxio_mpipe_iqueue_t iqueue; + /* Our cpu. */ + int my_cpu; + /* True if iqueue is valid. */ + bool has_iqueue; + /* NAPI flags. */ + bool napi_added; + bool napi_enabled; + /* Number of small sk_buffs which must still be provided. */ + unsigned int num_needed_small_buffers; + /* Number of large sk_buffs which must still be provided. */ + unsigned int num_needed_large_buffers; + /* A timer for handling egress completions. */ + struct hrtimer egress_timer; + /* True if "egress_timer" is scheduled. */ + bool egress_timer_scheduled; + /* Comps for each egress channel. */ + struct tile_net_comps *comps_for_echannel[TILE_NET_CHANNELS]; + /* Transmit wake timer for each egress channel. */ + struct tile_net_tx_wake tx_wake[TILE_NET_CHANNELS]; +}; + +/* Info for egress on a particular egress channel. */ +struct tile_net_egress { + /* The "equeue". */ + gxio_mpipe_equeue_t *equeue; + /* The headers for TSO. */ + unsigned char *headers; +}; + +/* Info for a specific device. */ +struct tile_net_priv { + /* Our network device. */ + struct net_device *dev; + /* The primary link. */ + gxio_mpipe_link_t link; + /* The primary channel, if open, else -1. */ + int channel; + /* The "loopify" egress link, if needed. */ + gxio_mpipe_link_t loopify_link; + /* The "loopify" egress channel, if open, else -1. */ + int loopify_channel; + /* The egress channel (channel or loopify_channel). */ + int echannel; + /* Total stats. */ + struct net_device_stats stats; +}; + +/* Egress info, indexed by "priv->echannel" (lazily created as needed). */ +static struct tile_net_egress egress_for_echannel[TILE_NET_CHANNELS]; + +/* Devices currently associated with each channel. + * NOTE: The array entry can become NULL after ifconfig down, but + * we do not free the underlying net_device structures, so it is + * safe to use a pointer after reading it from this array. + */ +static struct net_device *tile_net_devs_for_channel[TILE_NET_CHANNELS]; + +/* A mutex for "tile_net_devs_for_channel". */ +static DEFINE_MUTEX(tile_net_devs_for_channel_mutex); + +/* The per-cpu info. */ +static DEFINE_PER_CPU(struct tile_net_info, per_cpu_info); + +/* The "context" for all devices. */ +static gxio_mpipe_context_t context; + +/* Buffer sizes and mpipe enum codes for buffer stacks. + * See arch/tile/include/gxio/mpipe.h for the set of possible values. + */ +#define BUFFER_SIZE_SMALL_ENUM GXIO_MPIPE_BUFFER_SIZE_128 +#define BUFFER_SIZE_SMALL 128 +#define BUFFER_SIZE_LARGE_ENUM GXIO_MPIPE_BUFFER_SIZE_1664 +#define BUFFER_SIZE_LARGE 1664 + +/* The small/large "buffer stacks". */ +static int small_buffer_stack = -1; +static int large_buffer_stack = -1; + +/* Amount of memory allocated for each buffer stack. */ +static size_t buffer_stack_size; + +/* The actual memory allocated for the buffer stacks. */ +static void *small_buffer_stack_va; +static void *large_buffer_stack_va; + +/* The buckets. */ +static int first_bucket = -1; +static int num_buckets = 1; + +/* The ingress irq. */ +static int ingress_irq = -1; + +/* Text value of tile_net.cpus if passed as a module parameter. */ +static char *network_cpus_string; + +/* The actual cpus in "network_cpus". */ +static struct cpumask network_cpus_map; + +/* If "loopify=LINK" was specified, this is "LINK". */ +static char *loopify_link_name; + +/* If "tile_net.custom" was specified, this is non-NULL. */ +static char *custom_str; + +/* The "tile_net.cpus" argument specifies the cpus that are dedicated + * to handle ingress packets. + * + * The parameter should be in the form "tile_net.cpus=m-n[,x-y]", where + * m, n, x, y are integer numbers that represent the cpus that can be + * neither a dedicated cpu nor a dataplane cpu. + */ +static bool network_cpus_init(void) +{ + char buf[1024]; + int rc; + + if (network_cpus_string == NULL) + return false; + + rc = cpulist_parse_crop(network_cpus_string, &network_cpus_map); + if (rc != 0) { + pr_warn("tile_net.cpus=%s: malformed cpu list\n", + network_cpus_string); + return false; + } + + /* Remove dedicated cpus. */ + cpumask_and(&network_cpus_map, &network_cpus_map, cpu_possible_mask); + + if (cpumask_empty(&network_cpus_map)) { + pr_warn("Ignoring empty tile_net.cpus='%s'.\n", + network_cpus_string); + return false; + } + + cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map); + pr_info("Linux network CPUs: %s\n", buf); + return true; +} + +module_param_named(cpus, network_cpus_string, charp, 0444); +MODULE_PARM_DESC(cpus, "cpulist of cores that handle network interrupts"); + +/* The "tile_net.loopify=LINK" argument causes the named device to + * actually use "loop0" for ingress, and "loop1" for egress. This + * allows an app to sit between the actual link and linux, passing + * (some) packets along to linux, and forwarding (some) packets sent + * out by linux. + */ +module_param_named(loopify, loopify_link_name, charp, 0444); +MODULE_PARM_DESC(loopify, "name the device to use loop0/1 for ingress/egress"); + +/* The "tile_net.custom" argument causes us to ignore the "conventional" + * classifier metadata, in particular, the "l2_offset". + */ +module_param_named(custom, custom_str, charp, 0444); +MODULE_PARM_DESC(custom, "indicates a (heavily) customized classifier"); + +/* Atomically update a statistics field. + * Note that on TILE-Gx, this operation is fire-and-forget on the + * issuing core (single-cycle dispatch) and takes only a few cycles + * longer than a regular store when the request reaches the home cache. + * No expensive bus management overhead is required. + */ +static void tile_net_stats_add(unsigned long value, unsigned long *field) +{ + BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(unsigned long)); + atomic_long_add(value, (atomic_long_t *)field); +} + +/* Allocate and push a buffer. */ +static bool tile_net_provide_buffer(bool small) +{ + int stack = small ? small_buffer_stack : large_buffer_stack; + const unsigned long buffer_alignment = 128; + struct sk_buff *skb; + int len; + + len = sizeof(struct sk_buff **) + buffer_alignment; + len += (small ? BUFFER_SIZE_SMALL : BUFFER_SIZE_LARGE); + skb = dev_alloc_skb(len); + if (skb == NULL) + return false; + + /* Make room for a back-pointer to 'skb' and guarantee alignment. */ + skb_reserve(skb, sizeof(struct sk_buff **)); + skb_reserve(skb, -(long)skb->data & (buffer_alignment - 1)); + + /* Save a back-pointer to 'skb'. */ + *(struct sk_buff **)(skb->data - sizeof(struct sk_buff **)) = skb; + + /* Make sure "skb" and the back-pointer have been flushed. */ + wmb(); + + gxio_mpipe_push_buffer(&context, stack, + (void *)va_to_tile_io_addr(skb->data)); + + return true; +} + +/* Convert a raw mpipe buffer to its matching skb pointer. */ +static struct sk_buff *mpipe_buf_to_skb(void *va) +{ + /* Acquire the associated "skb". */ + struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); + struct sk_buff *skb = *skb_ptr; + + /* Paranoia. */ + if (skb->data != va) { + /* Panic here since there's a reasonable chance + * that corrupt buffers means generic memory + * corruption, with unpredictable system effects. + */ + panic("Corrupt linux buffer! va=%p, skb=%p, skb->data=%p", + va, skb, skb->data); + } + + return skb; +} + +static void tile_net_pop_all_buffers(int stack) +{ + for (;;) { + tile_io_addr_t addr = + (tile_io_addr_t)gxio_mpipe_pop_buffer(&context, stack); + if (addr == 0) + break; + dev_kfree_skb_irq(mpipe_buf_to_skb(tile_io_addr_to_va(addr))); + } +} + +/* Provide linux buffers to mPIPE. */ +static void tile_net_provide_needed_buffers(void) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + + while (info->num_needed_small_buffers != 0) { + if (!tile_net_provide_buffer(true)) + goto oops; + info->num_needed_small_buffers--; + } + + while (info->num_needed_large_buffers != 0) { + if (!tile_net_provide_buffer(false)) + goto oops; + info->num_needed_large_buffers--; + } + + return; + +oops: + /* Add a description to the page allocation failure dump. */ + pr_notice("Tile %d still needs some buffers\n", info->my_cpu); +} + +static inline bool filter_packet(struct net_device *dev, void *buf) +{ + /* Filter packets received before we're up. */ + if (dev == NULL || !(dev->flags & IFF_UP)) + return true; + + /* Filter out packets that aren't for us. */ + if (!(dev->flags & IFF_PROMISC) && + !is_multicast_ether_addr(buf) && + compare_ether_addr(dev->dev_addr, buf) != 0) + return true; + + return false; +} + +static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb, + gxio_mpipe_idesc_t *idesc, unsigned long len) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + + /* Encode the actual packet length. */ + skb_put(skb, len); + + skb->protocol = eth_type_trans(skb, dev); + + /* Acknowledge "good" hardware checksums. */ + if (idesc->cs && idesc->csum_seed_val == 0xFFFF) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + netif_receive_skb(skb); + + /* Update stats. */ + tile_net_stats_add(1, &priv->stats.rx_packets); + tile_net_stats_add(len, &priv->stats.rx_bytes); + + /* Need a new buffer. */ + if (idesc->size == BUFFER_SIZE_SMALL_ENUM) + info->num_needed_small_buffers++; + else + info->num_needed_large_buffers++; +} + +/* Handle a packet. Return true if "processed", false if "filtered". */ +static bool tile_net_handle_packet(gxio_mpipe_idesc_t *idesc) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct net_device *dev = tile_net_devs_for_channel[idesc->channel]; + uint8_t l2_offset; + void *va; + void *buf; + unsigned long len; + bool filter; + + /* Drop packets for which no buffer was available. + * NOTE: This happens under heavy load. + */ + if (idesc->be) { + struct tile_net_priv *priv = netdev_priv(dev); + tile_net_stats_add(1, &priv->stats.rx_dropped); + gxio_mpipe_iqueue_consume(&info->iqueue, idesc); + if (net_ratelimit()) + pr_info("Dropping packet (insufficient buffers).\n"); + return false; + } + + /* Get the "l2_offset", if allowed. */ + l2_offset = custom_str ? 0 : gxio_mpipe_idesc_get_l2_offset(idesc); + + /* Get the raw buffer VA (includes "headroom"). */ + va = tile_io_addr_to_va((unsigned long)(long)idesc->va); + + /* Get the actual packet start/length. */ + buf = va + l2_offset; + len = idesc->l2_size - l2_offset; + + /* Point "va" at the raw buffer. */ + va -= NET_IP_ALIGN; + + filter = filter_packet(dev, buf); + if (filter) { + gxio_mpipe_iqueue_drop(&info->iqueue, idesc); + } else { + struct sk_buff *skb = mpipe_buf_to_skb(va); + + /* Skip headroom, and any custom header. */ + skb_reserve(skb, NET_IP_ALIGN + l2_offset); + + tile_net_receive_skb(dev, skb, idesc, len); + } + + gxio_mpipe_iqueue_consume(&info->iqueue, idesc); + return !filter; +} + +/* Handle some packets for the current CPU. + * + * This function handles up to TILE_NET_BATCH idescs per call. + * + * ISSUE: Since we do not provide new buffers until this function is + * complete, we must initially provide enough buffers for each network + * cpu to fill its iqueue and also its batched idescs. + * + * ISSUE: The "rotting packet" race condition occurs if a packet + * arrives after the queue appears to be empty, and before the + * hypervisor interrupt is re-enabled. + */ +static int tile_net_poll(struct napi_struct *napi, int budget) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + unsigned int work = 0; + gxio_mpipe_idesc_t *idesc; + int i, n; + + /* Process packets. */ + while ((n = gxio_mpipe_iqueue_try_peek(&info->iqueue, &idesc)) > 0) { + for (i = 0; i < n; i++) { + if (i == TILE_NET_BATCH) + goto done; + if (tile_net_handle_packet(idesc + i)) { + if (++work >= budget) + goto done; + } + } + } + + /* There are no packets left. */ + napi_complete(&info->napi); + + /* Re-enable hypervisor interrupts. */ + gxio_mpipe_enable_notif_ring_interrupt(&context, info->iqueue.ring); + + /* HACK: Avoid the "rotting packet" problem. */ + if (gxio_mpipe_iqueue_try_peek(&info->iqueue, &idesc) > 0) + napi_schedule(&info->napi); + + /* ISSUE: Handle completions? */ + +done: + tile_net_provide_needed_buffers(); + + return work; +} + +/* Handle an ingress interrupt on the current cpu. */ +static irqreturn_t tile_net_handle_ingress_irq(int irq, void *unused) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + napi_schedule(&info->napi); + return IRQ_HANDLED; +} + +/* Free some completions. This must be called with interrupts blocked. */ +static int tile_net_free_comps(gxio_mpipe_equeue_t *equeue, + struct tile_net_comps *comps, + int limit, bool force_update) +{ + int n = 0; + while (comps->comp_last < comps->comp_next) { + unsigned int cid = comps->comp_last % TILE_NET_MAX_COMPS; + struct tile_net_comp *comp = &comps->comp_queue[cid]; + if (!gxio_mpipe_equeue_is_complete(equeue, comp->when, + force_update || n == 0)) + break; + dev_kfree_skb_irq(comp->skb); + comps->comp_last++; + if (++n == limit) + break; + } + return n; +} + +/* Add a completion. This must be called with interrupts blocked. + * tile_net_equeue_try_reserve() will have ensured a free completion entry. + */ +static void add_comp(gxio_mpipe_equeue_t *equeue, + struct tile_net_comps *comps, + uint64_t when, struct sk_buff *skb) +{ + int cid = comps->comp_next % TILE_NET_MAX_COMPS; + comps->comp_queue[cid].when = when; + comps->comp_queue[cid].skb = skb; + comps->comp_next++; +} + +static void tile_net_schedule_tx_wake_timer(struct net_device *dev) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + + hrtimer_start(&info->tx_wake[priv->echannel].timer, + ktime_set(0, TX_TIMER_DELAY_USEC * 1000UL), + HRTIMER_MODE_REL_PINNED); +} + +static enum hrtimer_restart tile_net_handle_tx_wake_timer(struct hrtimer *t) +{ + struct tile_net_tx_wake *tx_wake = + container_of(t, struct tile_net_tx_wake, timer); + netif_wake_subqueue(tx_wake->dev, smp_processor_id()); + return HRTIMER_NORESTART; +} + +/* Make sure the egress timer is scheduled. */ +static void tile_net_schedule_egress_timer(void) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + + if (!info->egress_timer_scheduled) { + hrtimer_start(&info->egress_timer, + ktime_set(0, EGRESS_TIMER_DELAY_USEC * 1000UL), + HRTIMER_MODE_REL_PINNED); + info->egress_timer_scheduled = true; + } +} + +/* The "function" for "info->egress_timer". + * + * This timer will reschedule itself as long as there are any pending + * completions expected for this tile. + */ +static enum hrtimer_restart tile_net_handle_egress_timer(struct hrtimer *t) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + unsigned long irqflags; + bool pending = false; + int i; + + local_irq_save(irqflags); + + /* The timer is no longer scheduled. */ + info->egress_timer_scheduled = false; + + /* Free all possible comps for this tile. */ + for (i = 0; i < TILE_NET_CHANNELS; i++) { + struct tile_net_egress *egress = &egress_for_echannel[i]; + struct tile_net_comps *comps = info->comps_for_echannel[i]; + if (comps->comp_last >= comps->comp_next) + continue; + tile_net_free_comps(egress->equeue, comps, -1, true); + pending = pending || (comps->comp_last < comps->comp_next); + } + + /* Reschedule timer if needed. */ + if (pending) + tile_net_schedule_egress_timer(); + + local_irq_restore(irqflags); + + return HRTIMER_NORESTART; +} + +/* Helper function for "tile_net_update()". + * "dev" (i.e. arg) is the device being brought up or down, + * or NULL if all devices are now down. + */ +static void tile_net_update_cpu(void *arg) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct net_device *dev = arg; + + if (!info->has_iqueue) + return; + + if (dev != NULL) { + if (!info->napi_added) { + netif_napi_add(dev, &info->napi, + tile_net_poll, TILE_NET_WEIGHT); + info->napi_added = true; + } + if (!info->napi_enabled) { + napi_enable(&info->napi); + info->napi_enabled = true; + } + enable_percpu_irq(ingress_irq, 0); + } else { + disable_percpu_irq(ingress_irq); + if (info->napi_enabled) { + napi_disable(&info->napi); + info->napi_enabled = false; + } + /* FIXME: Drain the iqueue. */ + } +} + +/* Helper function for tile_net_open() and tile_net_stop(). + * Always called under tile_net_devs_for_channel_mutex. + */ +static int tile_net_update(struct net_device *dev) +{ + static gxio_mpipe_rules_t rules; /* too big to fit on the stack */ + bool saw_channel = false; + int channel; + int rc; + int cpu; + + gxio_mpipe_rules_init(&rules, &context); + + for (channel = 0; channel < TILE_NET_CHANNELS; channel++) { + if (tile_net_devs_for_channel[channel] == NULL) + continue; + if (!saw_channel) { + saw_channel = true; + gxio_mpipe_rules_begin(&rules, first_bucket, + num_buckets, NULL); + gxio_mpipe_rules_set_headroom(&rules, NET_IP_ALIGN); + } + gxio_mpipe_rules_add_channel(&rules, channel); + } + + /* NOTE: This can fail if there is no classifier. + * ISSUE: Can anything else cause it to fail? + */ + rc = gxio_mpipe_rules_commit(&rules); + if (rc != 0) { + netdev_warn(dev, "gxio_mpipe_rules_commit failed: %d\n", rc); + return -EIO; + } + + /* Update all cpus, sequentially (to protect "netif_napi_add()"). */ + for_each_online_cpu(cpu) + smp_call_function_single(cpu, tile_net_update_cpu, + (saw_channel ? dev : NULL), 1); + + /* HACK: Allow packets to flow in the simulator. */ + if (saw_channel) + sim_enable_mpipe_links(0, -1); + + return 0; +} + +/* Allocate and initialize mpipe buffer stacks, and register them in + * the mPIPE TLBs, for both small and large packet sizes. + * This routine supports tile_net_init_mpipe(), below. + */ +static int init_buffer_stacks(struct net_device *dev, int num_buffers) +{ + pte_t hash_pte = pte_set_home((pte_t) { 0 }, PAGE_HOME_HASH); + int rc; + + /* Compute stack bytes; we round up to 64KB and then use + * alloc_pages() so we get the required 64KB alignment as well. + */ + buffer_stack_size = + ALIGN(gxio_mpipe_calc_buffer_stack_bytes(num_buffers), + 64 * 1024); + + /* Allocate two buffer stack indices. */ + rc = gxio_mpipe_alloc_buffer_stacks(&context, 2, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_buffer_stacks failed: %d\n", + rc); + return rc; + } + small_buffer_stack = rc; + large_buffer_stack = rc + 1; + + /* Allocate the small memory stack. */ + small_buffer_stack_va = + alloc_pages_exact(buffer_stack_size, GFP_KERNEL); + if (small_buffer_stack_va == NULL) { + netdev_err(dev, + "Could not alloc %zd bytes for buffer stacks\n", + buffer_stack_size); + return -ENOMEM; + } + rc = gxio_mpipe_init_buffer_stack(&context, small_buffer_stack, + BUFFER_SIZE_SMALL_ENUM, + small_buffer_stack_va, + buffer_stack_size, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_init_buffer_stack: %d\n", rc); + return rc; + } + rc = gxio_mpipe_register_client_memory(&context, small_buffer_stack, + hash_pte, 0); + if (rc != 0) { + netdev_err(dev, + "gxio_mpipe_register_buffer_memory failed: %d\n", + rc); + return rc; + } + + /* Allocate the large buffer stack. */ + large_buffer_stack_va = + alloc_pages_exact(buffer_stack_size, GFP_KERNEL); + if (large_buffer_stack_va == NULL) { + netdev_err(dev, + "Could not alloc %zd bytes for buffer stacks\n", + buffer_stack_size); + return -ENOMEM; + } + rc = gxio_mpipe_init_buffer_stack(&context, large_buffer_stack, + BUFFER_SIZE_LARGE_ENUM, + large_buffer_stack_va, + buffer_stack_size, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_init_buffer_stack failed: %d\n", + rc); + return rc; + } + rc = gxio_mpipe_register_client_memory(&context, large_buffer_stack, + hash_pte, 0); + if (rc != 0) { + netdev_err(dev, + "gxio_mpipe_register_buffer_memory failed: %d\n", + rc); + return rc; + } + + return 0; +} + +/* Allocate per-cpu resources (memory for completions and idescs). + * This routine supports tile_net_init_mpipe(), below. + */ +static int alloc_percpu_mpipe_resources(struct net_device *dev, + int cpu, int ring) +{ + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + int order, i, rc; + struct page *page; + void *addr; + + /* Allocate the "comps". */ + order = get_order(COMPS_SIZE); + page = homecache_alloc_pages(GFP_KERNEL, order, cpu); + if (page == NULL) { + netdev_err(dev, "Failed to alloc %zd bytes comps memory\n", + COMPS_SIZE); + return -ENOMEM; + } + addr = pfn_to_kaddr(page_to_pfn(page)); + memset(addr, 0, COMPS_SIZE); + for (i = 0; i < TILE_NET_CHANNELS; i++) + info->comps_for_echannel[i] = + addr + i * sizeof(struct tile_net_comps); + + /* If this is a network cpu, create an iqueue. */ + if (cpu_isset(cpu, network_cpus_map)) { + order = get_order(NOTIF_RING_SIZE); + page = homecache_alloc_pages(GFP_KERNEL, order, cpu); + if (page == NULL) { + netdev_err(dev, + "Failed to alloc %zd bytes iqueue memory\n", + NOTIF_RING_SIZE); + return -ENOMEM; + } + addr = pfn_to_kaddr(page_to_pfn(page)); + rc = gxio_mpipe_iqueue_init(&info->iqueue, &context, ring++, + addr, NOTIF_RING_SIZE, 0); + if (rc < 0) { + netdev_err(dev, + "gxio_mpipe_iqueue_init failed: %d\n", rc); + return rc; + } + info->has_iqueue = true; + } + + return ring; +} + +/* Initialize NotifGroup and buckets. + * This routine supports tile_net_init_mpipe(), below. + */ +static int init_notif_group_and_buckets(struct net_device *dev, + int ring, int network_cpus_count) +{ + int group, rc; + + /* Allocate one NotifGroup. */ + rc = gxio_mpipe_alloc_notif_groups(&context, 1, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_notif_groups failed: %d\n", + rc); + return rc; + } + group = rc; + + /* Initialize global num_buckets value. */ + if (network_cpus_count > 4) + num_buckets = 256; + else if (network_cpus_count > 1) + num_buckets = 16; + + /* Allocate some buckets, and set global first_bucket value. */ + rc = gxio_mpipe_alloc_buckets(&context, num_buckets, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_buckets failed: %d\n", rc); + return rc; + } + first_bucket = rc; + + /* Init group and buckets. */ + rc = gxio_mpipe_init_notif_group_and_buckets( + &context, group, ring, network_cpus_count, + first_bucket, num_buckets, + GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY); + if (rc != 0) { + netdev_err( + dev, + "gxio_mpipe_init_notif_group_and_buckets failed: %d\n", + rc); + return rc; + } + + return 0; +} + +/* Create an irq and register it, then activate the irq and request + * interrupts on all cores. Note that "ingress_irq" being initialized + * is how we know not to call tile_net_init_mpipe() again. + * This routine supports tile_net_init_mpipe(), below. + */ +static int tile_net_setup_interrupts(struct net_device *dev) +{ + int cpu, rc; + + rc = create_irq(); + if (rc < 0) { + netdev_err(dev, "create_irq failed: %d\n", rc); + return rc; + } + ingress_irq = rc; + tile_irq_activate(ingress_irq, TILE_IRQ_PERCPU); + rc = request_irq(ingress_irq, tile_net_handle_ingress_irq, + 0, NULL, NULL); + if (rc != 0) { + netdev_err(dev, "request_irq failed: %d\n", rc); + destroy_irq(ingress_irq); + ingress_irq = -1; + return rc; + } + + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + if (info->has_iqueue) { + gxio_mpipe_request_notif_ring_interrupt( + &context, cpu_x(cpu), cpu_y(cpu), + 1, ingress_irq, info->iqueue.ring); + } + } + + return 0; +} + +/* Undo any state set up partially by a failed call to tile_net_init_mpipe. */ +static void tile_net_init_mpipe_fail(void) +{ + int cpu; + + /* Do cleanups that require the mpipe context first. */ + if (small_buffer_stack >= 0) + tile_net_pop_all_buffers(small_buffer_stack); + if (large_buffer_stack >= 0) + tile_net_pop_all_buffers(large_buffer_stack); + + /* Destroy mpipe context so the hardware no longer owns any memory. */ + gxio_mpipe_destroy(&context); + + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + free_pages((unsigned long)(info->comps_for_echannel[0]), + get_order(COMPS_SIZE)); + info->comps_for_echannel[0] = NULL; + free_pages((unsigned long)(info->iqueue.idescs), + get_order(NOTIF_RING_SIZE)); + info->iqueue.idescs = NULL; + } + + if (small_buffer_stack_va) + free_pages_exact(small_buffer_stack_va, buffer_stack_size); + if (large_buffer_stack_va) + free_pages_exact(large_buffer_stack_va, buffer_stack_size); + + small_buffer_stack_va = NULL; + large_buffer_stack_va = NULL; + large_buffer_stack = -1; + small_buffer_stack = -1; + first_bucket = -1; +} + +/* The first time any tilegx network device is opened, we initialize + * the global mpipe state. If this step fails, we fail to open the + * device, but if it succeeds, we never need to do it again, and since + * tile_net can't be unloaded, we never undo it. + * + * Note that some resources in this path (buffer stack indices, + * bindings from init_buffer_stack, etc.) are hypervisor resources + * that are freed implicitly by gxio_mpipe_destroy(). + */ +static int tile_net_init_mpipe(struct net_device *dev) +{ + int i, num_buffers, rc; + int cpu; + int first_ring, ring; + int network_cpus_count = cpus_weight(network_cpus_map); + + if (!hash_default) { + netdev_err(dev, "Networking requires hash_default!\n"); + return -EIO; + } + + rc = gxio_mpipe_init(&context, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_init failed: %d\n", rc); + return -EIO; + } + + /* Set up the buffer stacks. */ + num_buffers = + network_cpus_count * (IQUEUE_ENTRIES + TILE_NET_BATCH); + rc = init_buffer_stacks(dev, num_buffers); + if (rc != 0) + goto fail; + + /* Provide initial buffers. */ + rc = -ENOMEM; + for (i = 0; i < num_buffers; i++) { + if (!tile_net_provide_buffer(true)) { + netdev_err(dev, "Cannot allocate initial sk_bufs!\n"); + goto fail; + } + } + for (i = 0; i < num_buffers; i++) { + if (!tile_net_provide_buffer(false)) { + netdev_err(dev, "Cannot allocate initial sk_bufs!\n"); + goto fail; + } + } + + /* Allocate one NotifRing for each network cpu. */ + rc = gxio_mpipe_alloc_notif_rings(&context, network_cpus_count, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_notif_rings failed %d\n", + rc); + goto fail; + } + + /* Init NotifRings per-cpu. */ + first_ring = rc; + ring = first_ring; + for_each_online_cpu(cpu) { + rc = alloc_percpu_mpipe_resources(dev, cpu, ring); + if (rc < 0) + goto fail; + ring = rc; + } + + /* Initialize NotifGroup and buckets. */ + rc = init_notif_group_and_buckets(dev, first_ring, network_cpus_count); + if (rc != 0) + goto fail; + + /* Create and enable interrupts. */ + rc = tile_net_setup_interrupts(dev); + if (rc != 0) + goto fail; + + return 0; + +fail: + tile_net_init_mpipe_fail(); + return rc; +} + +/* Create persistent egress info for a given egress channel. + * Note that this may be shared between, say, "gbe0" and "xgbe0". + * ISSUE: Defer header allocation until TSO is actually needed? + */ +static int tile_net_init_egress(struct net_device *dev, int echannel) +{ + struct page *headers_page, *edescs_page, *equeue_page; + gxio_mpipe_edesc_t *edescs; + gxio_mpipe_equeue_t *equeue; + unsigned char *headers; + int headers_order, edescs_order, equeue_order; + size_t edescs_size; + int edma; + int rc = -ENOMEM; + + /* Only initialize once. */ + if (egress_for_echannel[echannel].equeue != NULL) + return 0; + + /* Allocate memory for the "headers". */ + headers_order = get_order(EQUEUE_ENTRIES * HEADER_BYTES); + headers_page = alloc_pages(GFP_KERNEL, headers_order); + if (headers_page == NULL) { + netdev_warn(dev, + "Could not alloc %zd bytes for TSO headers.\n", + PAGE_SIZE << headers_order); + goto fail; + } + headers = pfn_to_kaddr(page_to_pfn(headers_page)); + + /* Allocate memory for the "edescs". */ + edescs_size = EQUEUE_ENTRIES * sizeof(*edescs); + edescs_order = get_order(edescs_size); + edescs_page = alloc_pages(GFP_KERNEL, edescs_order); + if (edescs_page == NULL) { + netdev_warn(dev, + "Could not alloc %zd bytes for eDMA ring.\n", + edescs_size); + goto fail_headers; + } + edescs = pfn_to_kaddr(page_to_pfn(edescs_page)); + + /* Allocate memory for the "equeue". */ + equeue_order = get_order(sizeof(*equeue)); + equeue_page = alloc_pages(GFP_KERNEL, equeue_order); + if (equeue_page == NULL) { + netdev_warn(dev, + "Could not alloc %zd bytes for equeue info.\n", + PAGE_SIZE << equeue_order); + goto fail_edescs; + } + equeue = pfn_to_kaddr(page_to_pfn(equeue_page)); + + /* Allocate an edma ring. Note that in practice this can't + * fail, which is good, because we will leak an edma ring if so. + */ + rc = gxio_mpipe_alloc_edma_rings(&context, 1, 0, 0); + if (rc < 0) { + netdev_warn(dev, "gxio_mpipe_alloc_edma_rings failed: %d\n", + rc); + goto fail_equeue; + } + edma = rc; + + /* Initialize the equeue. */ + rc = gxio_mpipe_equeue_init(equeue, &context, edma, echannel, + edescs, edescs_size, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_equeue_init failed: %d\n", rc); + goto fail_equeue; + } + + /* Done. */ + egress_for_echannel[echannel].equeue = equeue; + egress_for_echannel[echannel].headers = headers; + return 0; + +fail_equeue: + __free_pages(equeue_page, equeue_order); + +fail_edescs: + __free_pages(edescs_page, edescs_order); + +fail_headers: + __free_pages(headers_page, headers_order); + +fail: + return rc; +} + +/* Return channel number for a newly-opened link. */ +static int tile_net_link_open(struct net_device *dev, gxio_mpipe_link_t *link, + const char *link_name) +{ + int rc = gxio_mpipe_link_open(link, &context, link_name, 0); + if (rc < 0) { + netdev_err(dev, "Failed to open '%s'\n", link_name); + return rc; + } + rc = gxio_mpipe_link_channel(link); + if (rc < 0 || rc >= TILE_NET_CHANNELS) { + netdev_err(dev, "gxio_mpipe_link_channel bad value: %d\n", rc); + gxio_mpipe_link_close(link); + return -EINVAL; + } + return rc; +} + +/* Help the kernel activate the given network interface. */ +static int tile_net_open(struct net_device *dev) +{ + struct tile_net_priv *priv = netdev_priv(dev); + int cpu, rc; + + mutex_lock(&tile_net_devs_for_channel_mutex); + + /* Do one-time initialization the first time any device is opened. */ + if (ingress_irq < 0) { + rc = tile_net_init_mpipe(dev); + if (rc != 0) + goto fail; + } + + /* Determine if this is the "loopify" device. */ + if (unlikely((loopify_link_name != NULL) && + !strcmp(dev->name, loopify_link_name))) { + rc = tile_net_link_open(dev, &priv->link, "loop0"); + if (rc < 0) + goto fail; + priv->channel = rc; + rc = tile_net_link_open(dev, &priv->loopify_link, "loop1"); + if (rc < 0) + goto fail; + priv->loopify_channel = rc; + priv->echannel = rc; + } else { + rc = tile_net_link_open(dev, &priv->link, dev->name); + if (rc < 0) + goto fail; + priv->channel = rc; + priv->echannel = rc; + } + + /* Initialize egress info (if needed). Once ever, per echannel. */ + rc = tile_net_init_egress(dev, priv->echannel); + if (rc != 0) + goto fail; + + tile_net_devs_for_channel[priv->channel] = dev; + + rc = tile_net_update(dev); + if (rc != 0) + goto fail; + + mutex_unlock(&tile_net_devs_for_channel_mutex); + + /* Initialize the transmit wake timer for this device for each cpu. */ + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + struct tile_net_tx_wake *tx_wake = + &info->tx_wake[priv->echannel]; + + hrtimer_init(&tx_wake->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + tx_wake->timer.function = tile_net_handle_tx_wake_timer; + tx_wake->dev = dev; + } + + for_each_online_cpu(cpu) + netif_start_subqueue(dev, cpu); + netif_carrier_on(dev); + return 0; + +fail: + if (priv->loopify_channel >= 0) { + if (gxio_mpipe_link_close(&priv->loopify_link) != 0) + netdev_warn(dev, "Failed to close loopify link!\n"); + priv->loopify_channel = -1; + } + if (priv->channel >= 0) { + if (gxio_mpipe_link_close(&priv->link) != 0) + netdev_warn(dev, "Failed to close link!\n"); + priv->channel = -1; + } + priv->echannel = -1; + tile_net_devs_for_channel[priv->channel] = NULL; + mutex_unlock(&tile_net_devs_for_channel_mutex); + + /* Don't return raw gxio error codes to generic Linux. */ + return (rc > -512) ? rc : -EIO; +} + +/* Help the kernel deactivate the given network interface. */ +static int tile_net_stop(struct net_device *dev) +{ + struct tile_net_priv *priv = netdev_priv(dev); + int cpu; + + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + struct tile_net_tx_wake *tx_wake = + &info->tx_wake[priv->echannel]; + + hrtimer_cancel(&tx_wake->timer); + netif_stop_subqueue(dev, cpu); + } + + mutex_lock(&tile_net_devs_for_channel_mutex); + tile_net_devs_for_channel[priv->channel] = NULL; + (void)tile_net_update(dev); + if (priv->loopify_channel >= 0) { + if (gxio_mpipe_link_close(&priv->loopify_link) != 0) + netdev_warn(dev, "Failed to close loopify link!\n"); + priv->loopify_channel = -1; + } + if (priv->channel >= 0) { + if (gxio_mpipe_link_close(&priv->link) != 0) + netdev_warn(dev, "Failed to close link!\n"); + priv->channel = -1; + } + priv->echannel = -1; + mutex_unlock(&tile_net_devs_for_channel_mutex); + + return 0; +} + +/* Determine the VA for a fragment. */ +static inline void *tile_net_frag_buf(skb_frag_t *f) +{ + unsigned long pfn = page_to_pfn(skb_frag_page(f)); + return pfn_to_kaddr(pfn) + f->page_offset; +} + +/* Acquire a completion entry and an egress slot, or if we can't, + * stop the queue and schedule the tx_wake timer. + */ +static s64 tile_net_equeue_try_reserve(struct net_device *dev, + struct tile_net_comps *comps, + gxio_mpipe_equeue_t *equeue, + int num_edescs) +{ + /* Try to acquire a completion entry. */ + if (comps->comp_next - comps->comp_last < TILE_NET_MAX_COMPS - 1 || + tile_net_free_comps(equeue, comps, 32, false) != 0) { + + /* Try to acquire an egress slot. */ + s64 slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs); + if (slot >= 0) + return slot; + + /* Freeing some completions gives the equeue time to drain. */ + tile_net_free_comps(equeue, comps, TILE_NET_MAX_COMPS, false); + + slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs); + if (slot >= 0) + return slot; + } + + /* Still nothing; give up and stop the queue for a short while. */ + netif_stop_subqueue(dev, smp_processor_id()); + tile_net_schedule_tx_wake_timer(dev); + return -1; +} + +/* Determine how many edesc's are needed for TSO. + * + * Sometimes, if "sendfile()" requires copying, we will be called with + * "data" containing the header and payload, with "frags" being empty. + * Sometimes, for example when using NFS over TCP, a single segment can + * span 3 fragments. This requires special care. + */ +static int tso_count_edescs(struct sk_buff *skb) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + unsigned int data_len = skb->data_len; + unsigned int p_len = sh->gso_size; + long f_id = -1; /* id of the current fragment */ + long f_size = -1; /* size of the current fragment */ + long f_used = -1; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + int num_edescs = 0; + int segment; + + for (segment = 0; segment < sh->gso_segs; segment++) { + + unsigned int p_used = 0; + + /* One edesc for header and for each piece of the payload. */ + for (num_edescs++; p_used < p_len; num_edescs++) { + + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = sh->frags[f_id].size; + f_used = 0; + } + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + } + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + return num_edescs; +} + +/* Prepare modified copies of the skbuff headers. + * FIXME: add support for IPv6. + */ +static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers, + s64 slot) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + struct iphdr *ih; + struct tcphdr *th; + unsigned int data_len = skb->data_len; + unsigned char *data = skb->data; + unsigned int ih_off, th_off, sh_len, p_len; + unsigned int isum_seed, tsum_seed, id, seq; + long f_id = -1; /* id of the current fragment */ + long f_size = -1; /* size of the current fragment */ + long f_used = -1; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + int segment; + + /* Locate original headers and compute various lengths. */ + ih = ip_hdr(skb); + th = tcp_hdr(skb); + ih_off = skb_network_offset(skb); + th_off = skb_transport_offset(skb); + sh_len = th_off + tcp_hdrlen(skb); + p_len = sh->gso_size; + + /* Set up seed values for IP and TCP csum and initialize id and seq. */ + isum_seed = ((0xFFFF - ih->check) + + (0xFFFF - ih->tot_len) + + (0xFFFF - ih->id)); + tsum_seed = th->check + (0xFFFF ^ htons(skb->len)); + id = ntohs(ih->id); + seq = ntohl(th->seq); + + /* Prepare all the headers. */ + for (segment = 0; segment < sh->gso_segs; segment++) { + unsigned char *buf; + unsigned int p_used = 0; + + /* Copy to the header memory for this segment. */ + buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES + + NET_IP_ALIGN; + memcpy(buf, data, sh_len); + + /* Update copied ip header. */ + ih = (struct iphdr *)(buf + ih_off); + ih->tot_len = htons(sh_len + p_len - ih_off); + ih->id = htons(id); + ih->check = csum_long(isum_seed + ih->tot_len + + ih->id) ^ 0xffff; + + /* Update copied tcp header. */ + th = (struct tcphdr *)(buf + th_off); + th->seq = htonl(seq); + th->check = csum_long(tsum_seed + htons(sh_len + p_len)); + if (segment != sh->gso_segs - 1) { + th->fin = 0; + th->psh = 0; + } + + /* Skip past the header. */ + slot++; + + /* Skip past the payload. */ + while (p_used < p_len) { + + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = sh->frags[f_id].size; + f_used = 0; + } + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + + slot++; + } + + id++; + seq += p_len; + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + /* Flush the headers so they are ready for hardware DMA. */ + wmb(); +} + +/* Pass all the data to mpipe for egress. */ +static void tso_egress(struct net_device *dev, gxio_mpipe_equeue_t *equeue, + struct sk_buff *skb, unsigned char *headers, s64 slot) +{ + struct tile_net_priv *priv = netdev_priv(dev); + struct skb_shared_info *sh = skb_shinfo(skb); + unsigned int data_len = skb->data_len; + unsigned int p_len = sh->gso_size; + gxio_mpipe_edesc_t edesc_head = { { 0 } }; + gxio_mpipe_edesc_t edesc_body = { { 0 } }; + long f_id = -1; /* id of the current fragment */ + long f_size = -1; /* size of the current fragment */ + long f_used = -1; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + unsigned long tx_packets = 0, tx_bytes = 0; + unsigned int csum_start, sh_len; + int segment; + + /* Prepare to egress the headers: set up header edesc. */ + csum_start = skb_checksum_start_offset(skb); + sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + edesc_head.csum = 1; + edesc_head.csum_start = csum_start; + edesc_head.csum_dest = csum_start + skb->csum_offset; + edesc_head.xfer_size = sh_len; + + /* This is only used to specify the TLB. */ + edesc_head.stack_idx = large_buffer_stack; + edesc_body.stack_idx = large_buffer_stack; + + /* Egress all the edescs. */ + for (segment = 0; segment < sh->gso_segs; segment++) { + void *va; + unsigned char *buf; + unsigned int p_used = 0; + + /* Egress the header. */ + buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES + + NET_IP_ALIGN; + edesc_head.va = va_to_tile_io_addr(buf); + gxio_mpipe_equeue_put_at(equeue, edesc_head, slot); + slot++; + + /* Egress the payload. */ + while (p_used < p_len) { + + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = sh->frags[f_id].size; + f_used = 0; + } + + va = tile_net_frag_buf(&sh->frags[f_id]) + f_used; + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + + /* Egress a piece of the payload. */ + edesc_body.va = va_to_tile_io_addr(va); + edesc_body.xfer_size = n; + edesc_body.bound = !(p_used < p_len); + gxio_mpipe_equeue_put_at(equeue, edesc_body, slot); + slot++; + } + + tx_packets++; + tx_bytes += sh_len + p_len; + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + /* Update stats. */ + tile_net_stats_add(tx_packets, &priv->stats.tx_packets); + tile_net_stats_add(tx_bytes, &priv->stats.tx_bytes); +} + +/* Do "TSO" handling for egress. + * + * Normally drivers set NETIF_F_TSO only to support hardware TSO; + * otherwise the stack uses scatter-gather to implement GSO in software. + * On our testing, enabling GSO support (via NETIF_F_SG) drops network + * performance down to around 7.5 Gbps on the 10G interfaces, although + * also dropping cpu utilization way down, to under 8%. But + * implementing "TSO" in the driver brings performance back up to line + * rate, while dropping cpu usage even further, to less than 4%. In + * practice, profiling of GSO shows that skb_segment() is what causes + * the performance overheads; we benefit in the driver from using + * preallocated memory to duplicate the TCP/IP headers. + */ +static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + int channel = priv->echannel; + struct tile_net_egress *egress = &egress_for_echannel[channel]; + struct tile_net_comps *comps = info->comps_for_echannel[channel]; + gxio_mpipe_equeue_t *equeue = egress->equeue; + unsigned long irqflags; + int num_edescs; + s64 slot; + + /* Determine how many mpipe edesc's are needed. */ + num_edescs = tso_count_edescs(skb); + + local_irq_save(irqflags); + + /* Try to acquire a completion entry and an egress slot. */ + slot = tile_net_equeue_try_reserve(dev, comps, equeue, num_edescs); + if (slot < 0) { + local_irq_restore(irqflags); + return NETDEV_TX_BUSY; + } + + /* Set up copies of header data properly. */ + tso_headers_prepare(skb, egress->headers, slot); + + /* Actually pass the data to the network hardware. */ + tso_egress(dev, equeue, skb, egress->headers, slot); + + /* Add a completion record. */ + add_comp(equeue, comps, slot + num_edescs - 1, skb); + + local_irq_restore(irqflags); + + /* Make sure the egress timer is scheduled. */ + tile_net_schedule_egress_timer(); + + return NETDEV_TX_OK; +} + +/* Analyze the body and frags for a transmit request. */ +static unsigned int tile_net_tx_frags(struct frag *frags, + struct sk_buff *skb, + void *b_data, unsigned int b_len) +{ + unsigned int i, n = 0; + + struct skb_shared_info *sh = skb_shinfo(skb); + + if (b_len != 0) { + frags[n].buf = b_data; + frags[n++].length = b_len; + } + + for (i = 0; i < sh->nr_frags; i++) { + skb_frag_t *f = &sh->frags[i]; + frags[n].buf = tile_net_frag_buf(f); + frags[n++].length = skb_frag_size(f); + } + + return n; +} + +/* Help the kernel transmit a packet. */ +static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + struct tile_net_egress *egress = &egress_for_echannel[priv->echannel]; + gxio_mpipe_equeue_t *equeue = egress->equeue; + struct tile_net_comps *comps = + info->comps_for_echannel[priv->echannel]; + unsigned int len = skb->len; + unsigned char *data = skb->data; + unsigned int num_edescs; + struct frag frags[MAX_FRAGS]; + gxio_mpipe_edesc_t edescs[MAX_FRAGS]; + unsigned long irqflags; + gxio_mpipe_edesc_t edesc = { { 0 } }; + unsigned int i; + s64 slot; + + if (skb_is_gso(skb)) + return tile_net_tx_tso(skb, dev); + + num_edescs = tile_net_tx_frags(frags, skb, data, skb_headlen(skb)); + + /* This is only used to specify the TLB. */ + edesc.stack_idx = large_buffer_stack; + + /* Prepare the edescs. */ + for (i = 0; i < num_edescs; i++) { + edesc.xfer_size = frags[i].length; + edesc.va = va_to_tile_io_addr(frags[i].buf); + edescs[i] = edesc; + } + + /* Mark the final edesc. */ + edescs[num_edescs - 1].bound = 1; + + /* Add checksum info to the initial edesc, if needed. */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + unsigned int csum_start = skb_checksum_start_offset(skb); + edescs[0].csum = 1; + edescs[0].csum_start = csum_start; + edescs[0].csum_dest = csum_start + skb->csum_offset; + } + + local_irq_save(irqflags); + + /* Try to acquire a completion entry and an egress slot. */ + slot = tile_net_equeue_try_reserve(dev, comps, equeue, num_edescs); + if (slot < 0) { + local_irq_restore(irqflags); + return NETDEV_TX_BUSY; + } + + for (i = 0; i < num_edescs; i++) + gxio_mpipe_equeue_put_at(equeue, edescs[i], slot++); + + /* Add a completion record. */ + add_comp(equeue, comps, slot - 1, skb); + + /* NOTE: Use ETH_ZLEN for short packets (e.g. 42 < 60). */ + tile_net_stats_add(1, &priv->stats.tx_packets); + tile_net_stats_add(max_t(unsigned int, len, ETH_ZLEN), + &priv->stats.tx_bytes); + + local_irq_restore(irqflags); + + /* Make sure the egress timer is scheduled. */ + tile_net_schedule_egress_timer(); + + return NETDEV_TX_OK; +} + +/* Return subqueue id on this core (one per core). */ +static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + return smp_processor_id(); +} + +/* Deal with a transmit timeout. */ +static void tile_net_tx_timeout(struct net_device *dev) +{ + int cpu; + + for_each_online_cpu(cpu) + netif_wake_subqueue(dev, cpu); +} + +/* Ioctl commands. */ +static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + return -EOPNOTSUPP; +} + +/* Get system network statistics for device. */ +static struct net_device_stats *tile_net_get_stats(struct net_device *dev) +{ + struct tile_net_priv *priv = netdev_priv(dev); + return &priv->stats; +} + +/* Change the MTU. */ +static int tile_net_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || (new_mtu > 1500)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +/* Change the Ethernet address of the NIC. + * + * The hypervisor driver does not support changing MAC address. However, + * the hardware does not do anything with the MAC address, so the address + * which gets used on outgoing packets, and which is accepted on incoming + * packets, is completely up to us. + * + * Returns 0 on success, negative on failure. + */ +static int tile_net_set_mac_address(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EINVAL; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* Polling 'interrupt' - used by things like netconsole to send skbs + * without having to re-enable interrupts. It's not called while + * the interrupt routine is executing. + */ +static void tile_net_netpoll(struct net_device *dev) +{ + disable_percpu_irq(ingress_irq); + tile_net_handle_ingress_irq(ingress_irq, NULL); + enable_percpu_irq(ingress_irq, 0); +} +#endif + +static const struct net_device_ops tile_net_ops = { + .ndo_open = tile_net_open, + .ndo_stop = tile_net_stop, + .ndo_start_xmit = tile_net_tx, + .ndo_select_queue = tile_net_select_queue, + .ndo_do_ioctl = tile_net_ioctl, + .ndo_get_stats = tile_net_get_stats, + .ndo_change_mtu = tile_net_change_mtu, + .ndo_tx_timeout = tile_net_tx_timeout, + .ndo_set_mac_address = tile_net_set_mac_address, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tile_net_netpoll, +#endif +}; + +/* The setup function. + * + * This uses ether_setup() to assign various fields in dev, including + * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields. + */ +static void tile_net_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->netdev_ops = &tile_net_ops; + dev->watchdog_timeo = TILE_NET_TIMEOUT; + dev->features |= NETIF_F_LLTX; + dev->features |= NETIF_F_HW_CSUM; + dev->features |= NETIF_F_SG; + dev->features |= NETIF_F_TSO; + dev->mtu = 1500; +} + +/* Allocate the device structure, register the device, and obtain the + * MAC address from the hypervisor. + */ +static void tile_net_dev_init(const char *name, const uint8_t *mac) +{ + int ret; + int i; + int nz_addr = 0; + struct net_device *dev; + struct tile_net_priv *priv; + + /* HACK: Ignore "loop" links. */ + if (strncmp(name, "loop", 4) == 0) + return; + + /* Allocate the device structure. Normally, "name" is a + * template, instantiated by register_netdev(), but not for us. + */ + dev = alloc_netdev_mqs(sizeof(*priv), name, tile_net_setup, + NR_CPUS, 1); + if (!dev) { + pr_err("alloc_netdev_mqs(%s) failed\n", name); + return; + } + + /* Initialize "priv". */ + priv = netdev_priv(dev); + memset(priv, 0, sizeof(*priv)); + priv->dev = dev; + priv->channel = -1; + priv->loopify_channel = -1; + priv->echannel = -1; + + /* Get the MAC address and set it in the device struct; this must + * be done before the device is opened. If the MAC is all zeroes, + * we use a random address, since we're probably on the simulator. + */ + for (i = 0; i < 6; i++) + nz_addr |= mac[i]; + + if (nz_addr) { + memcpy(dev->dev_addr, mac, 6); + dev->addr_len = 6; + } else { + random_ether_addr(dev->dev_addr); + } + + /* Register the network device. */ + ret = register_netdev(dev); + if (ret) { + netdev_err(dev, "register_netdev failed %d\n", ret); + free_netdev(dev); + return; + } +} + +/* Per-cpu module initialization. */ +static void tile_net_init_module_percpu(void *unused) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + int my_cpu = smp_processor_id(); + + info->has_iqueue = false; + + info->my_cpu = my_cpu; + + /* Initialize the egress timer. */ + hrtimer_init(&info->egress_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + info->egress_timer.function = tile_net_handle_egress_timer; +} + +/* Module initialization. */ +static int __init tile_net_init_module(void) +{ + int i; + char name[GXIO_MPIPE_LINK_NAME_LEN]; + uint8_t mac[6]; + + pr_info("Tilera Network Driver\n"); + + mutex_init(&tile_net_devs_for_channel_mutex); + + /* Initialize each CPU. */ + on_each_cpu(tile_net_init_module_percpu, NULL, 1); + + /* Find out what devices we have, and initialize them. */ + for (i = 0; gxio_mpipe_link_enumerate_mac(i, name, mac) >= 0; i++) + tile_net_dev_init(name, mac); + + if (!network_cpus_init()) + network_cpus_map = *cpu_online_mask; + + return 0; +} + +module_init(tile_net_init_module); From 8a51178ed64dca418478996697611ef087fa22df Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sat, 9 Jun 2012 22:51:47 +0000 Subject: [PATCH 356/475] sparc: remove two unused headers Nothing includes these two headers. None of the macros they define are used anywhere in the tree. This was also the case in v2.6.12-rc2 and, presumably, every release in between. These two headers can safely be removed. Signed-off-by: Paul Bolle Signed-off-by: David S. Miller --- arch/sparc/include/asm/cmt.h | 59 ----------------------------- arch/sparc/include/asm/mpmbox.h | 67 --------------------------------- 2 files changed, 126 deletions(-) delete mode 100644 arch/sparc/include/asm/cmt.h delete mode 100644 arch/sparc/include/asm/mpmbox.h diff --git a/arch/sparc/include/asm/cmt.h b/arch/sparc/include/asm/cmt.h deleted file mode 100644 index 870db592857..00000000000 --- a/arch/sparc/include/asm/cmt.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef _SPARC64_CMT_H -#define _SPARC64_CMT_H - -/* cmt.h: Chip Multi-Threading register definitions - * - * Copyright (C) 2004 David S. Miller (davem@redhat.com) - */ - -/* ASI_CORE_ID - private */ -#define LP_ID 0x0000000000000010UL -#define LP_ID_MAX 0x00000000003f0000UL -#define LP_ID_ID 0x000000000000003fUL - -/* ASI_INTR_ID - private */ -#define LP_INTR_ID 0x0000000000000000UL -#define LP_INTR_ID_ID 0x00000000000003ffUL - -/* ASI_CESR_ID - private */ -#define CESR_ID 0x0000000000000040UL -#define CESR_ID_ID 0x00000000000000ffUL - -/* ASI_CORE_AVAILABLE - shared */ -#define LP_AVAIL 0x0000000000000000UL -#define LP_AVAIL_1 0x0000000000000002UL -#define LP_AVAIL_0 0x0000000000000001UL - -/* ASI_CORE_ENABLE_STATUS - shared */ -#define LP_ENAB_STAT 0x0000000000000010UL -#define LP_ENAB_STAT_1 0x0000000000000002UL -#define LP_ENAB_STAT_0 0x0000000000000001UL - -/* ASI_CORE_ENABLE - shared */ -#define LP_ENAB 0x0000000000000020UL -#define LP_ENAB_1 0x0000000000000002UL -#define LP_ENAB_0 0x0000000000000001UL - -/* ASI_CORE_RUNNING - shared */ -#define LP_RUNNING_RW 0x0000000000000050UL -#define LP_RUNNING_W1S 0x0000000000000060UL -#define LP_RUNNING_W1C 0x0000000000000068UL -#define LP_RUNNING_1 0x0000000000000002UL -#define LP_RUNNING_0 0x0000000000000001UL - -/* ASI_CORE_RUNNING_STAT - shared */ -#define LP_RUN_STAT 0x0000000000000058UL -#define LP_RUN_STAT_1 0x0000000000000002UL -#define LP_RUN_STAT_0 0x0000000000000001UL - -/* ASI_XIR_STEERING - shared */ -#define LP_XIR_STEER 0x0000000000000030UL -#define LP_XIR_STEER_1 0x0000000000000002UL -#define LP_XIR_STEER_0 0x0000000000000001UL - -/* ASI_CMT_ERROR_STEERING - shared */ -#define CMT_ER_STEER 0x0000000000000040UL -#define CMT_ER_STEER_1 0x0000000000000002UL -#define CMT_ER_STEER_0 0x0000000000000001UL - -#endif /* _SPARC64_CMT_H */ diff --git a/arch/sparc/include/asm/mpmbox.h b/arch/sparc/include/asm/mpmbox.h deleted file mode 100644 index f8423039b24..00000000000 --- a/arch/sparc/include/asm/mpmbox.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * mpmbox.h: Interface and defines for the OpenProm mailbox - * facilities for MP machines under Linux. - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - */ - -#ifndef _SPARC_MPMBOX_H -#define _SPARC_MPMBOX_H - -/* The prom allocates, for each CPU on the machine an unsigned - * byte in physical ram. You probe the device tree prom nodes - * for these values. The purpose of this byte is to be able to - * pass messages from one cpu to another. - */ - -/* These are the main message types we have to look for in our - * Cpu mailboxes, based upon these values we decide what course - * of action to take. - */ - -/* The CPU is executing code in the kernel. */ -#define MAILBOX_ISRUNNING 0xf0 - -/* Another CPU called romvec->pv_exit(), you should call - * prom_stopcpu() when you see this in your mailbox. - */ -#define MAILBOX_EXIT 0xfb - -/* Another CPU called romvec->pv_enter(), you should call - * prom_cpuidle() when this is seen. - */ -#define MAILBOX_GOSPIN 0xfc - -/* Another CPU has hit a breakpoint either into kadb or the prom - * itself. Just like MAILBOX_GOSPIN, you should call prom_cpuidle() - * at this point. - */ -#define MAILBOX_BPT_SPIN 0xfd - -/* Oh geese, some other nitwit got a damn watchdog reset. The party's - * over so go call prom_stopcpu(). - */ -#define MAILBOX_WDOG_STOP 0xfe - -#ifndef __ASSEMBLY__ - -/* Handy macro's to determine a cpu's state. */ - -/* Is the cpu still in Power On Self Test? */ -#define MBOX_POST_P(letter) ((letter) >= 0x00 && (letter) <= 0x7f) - -/* Is the cpu at the 'ok' prompt of the PROM? */ -#define MBOX_PROMPROMPT_P(letter) ((letter) >= 0x80 && (letter) <= 0x8f) - -/* Is the cpu spinning in the PROM? */ -#define MBOX_PROMSPIN_P(letter) ((letter) >= 0x90 && (letter) <= 0xef) - -/* Sanity check... This is junk mail, throw it out. */ -#define MBOX_BOGON_P(letter) ((letter) >= 0xf1 && (letter) <= 0xfa) - -/* Is the cpu actively running an application/kernel-code? */ -#define MBOX_RUNNING_P(letter) ((letter) == MAILBOX_ISRUNNING) - -#endif /* !(__ASSEMBLY__) */ - -#endif /* !(_SPARC_MPMBOX_H) */ From f75b0d07da12e67559d51071391feaf0c265d292 Mon Sep 17 00:00:00 2001 From: Steven King Date: Sat, 5 May 2012 13:40:44 -0700 Subject: [PATCH 357/475] m68knommu: m528x qspi definition fix The consolidation of the qspi code missed a definition for 528x. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m528xsim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/m528xsim.h b/arch/m68k/include/asm/m528xsim.h index d63b99ff7ff..497c31c803f 100644 --- a/arch/m68k/include/asm/m528xsim.h +++ b/arch/m68k/include/asm/m528xsim.h @@ -86,7 +86,7 @@ /* * QSPI module. */ -#define MCFQSPI_IOBASE (MCF_IPSBAR + 0x340) +#define MCFQSPI_BASE (MCF_IPSBAR + 0x340) #define MCFQSPI_SIZE 0x40 #define MCFQSPI_CS0 147 From b13b3f51ff7bec19ac1ba66e3623bfaa484af124 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 23 May 2012 11:32:45 +1000 Subject: [PATCH 358/475] m68k: fix inclusion of arch_gettimeoffset for non-MMU 68k classic CPU types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building for non-MMU based classic 68k CPU types (like the 68328 for example) you get a compilation error: CC arch/m68k/kernel/time.o arch/m68k/kernel/time.c:91:5: error: redefinition of ‘arch_gettimeoffset’ include/linux/time.h:145:19: note: previous definition of ‘arch_gettimeoffset’ was here The arch_gettimeoffset() code is included when building for these CPU types, but it shouldn't be. Those machine types do not have CONFIG_ARCH_USES_GETTIMEOFFSET set. The fix is simply to conditionally include the arch_gettimeoffset() code on that same config setting that specifies its use or not. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven --- arch/m68k/kernel/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index d7deb7fc7eb..707f0573ec6 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -85,7 +85,7 @@ void __init time_init(void) mach_sched_init(timer_interrupt); } -#ifdef CONFIG_M68KCLASSIC +#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET u32 arch_gettimeoffset(void) { @@ -108,4 +108,4 @@ static int __init rtc_init(void) module_init(rtc_init); -#endif /* CONFIG_M68KCLASSIC */ +#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */ From dc5588ae3c597006095926c25634408257457a8f Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 23 May 2012 13:27:18 +1000 Subject: [PATCH 359/475] m68knommu: fix 68328 local setting of timer interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling for 68328 based targets fails with: arch/m68k/platform/68328/timers.c: In function ‘hw_tick’: arch/m68k/platform/68328/timers.c:65:2: error: implicit declaration of function ‘arch_timer_interrupt’ arch/m68k/platform/68328/timers.c: At top level: arch/m68k/platform/68328/timers.c:102:6: error: conflicting types for ‘hw_timer_init’ arch/m68k/include/asm/machdep.h:36:13: note: previous declaration of ‘hw_timer_init’ was here Changes made to hw_timer_init() didn't get updated in the 68328 timer code. So process and call the "handler" arg that is now passed into that hw_timer_init() function. Signed-off-by: Greg Ungerer --- arch/m68k/platform/68328/timers.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/m68k/platform/68328/timers.c b/arch/m68k/platform/68328/timers.c index c801c172b82..f4dc9b29560 100644 --- a/arch/m68k/platform/68328/timers.c +++ b/arch/m68k/platform/68328/timers.c @@ -53,6 +53,7 @@ #endif static u32 m68328_tick_cnt; +static irq_handler_t timer_interrupt; /***************************************************************************/ @@ -62,7 +63,7 @@ static irqreturn_t hw_tick(int irq, void *dummy) TSTAT &= 0; m68328_tick_cnt += TICKS_PER_JIFFY; - return arch_timer_interrupt(irq, dummy); + return timer_interrupt(irq, dummy); } /***************************************************************************/ @@ -99,7 +100,7 @@ static struct clocksource m68328_clk = { /***************************************************************************/ -void hw_timer_init(void) +void hw_timer_init(irq_handler_t handler) { /* disable timer 1 */ TCTL = 0; @@ -115,6 +116,7 @@ void hw_timer_init(void) /* Enable timer 1 */ TCTL |= TCTL_TEN; clocksource_register_hz(&m68328_clk, TICKS_PER_JIFFY*HZ); + timer_interrupt = handler; } /***************************************************************************/ From 1b461d7631f17804257ab1734bd95c05ab655f8b Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 23 May 2012 13:32:55 +1000 Subject: [PATCH 360/475] m68knommu: fix 68360 local setting of timer interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling for 68360 based targets fails with: arch/m68k/platform/68360/config.c: In function ‘hw_tick’: arch/m68k/platform/68360/config.c:55:2: error: implicit declaration of function ‘arch_timer_interrupt’ arch/m68k/platform/68360/config.c: At top level: arch/m68k/platform/68360/config.c:64:6: error: conflicting types for ‘hw_timer_init’ arch/m68k/include/asm/machdep.h:36:13: note: previous declaration of ‘hw_timer_init’ was here Changes made to hw_timer_init() didn't get updated in the 68328 timer code. So process and call the "handler" arg that is now passed into that hw_timer_init() function. Signed-off-by: Greg Ungerer --- arch/m68k/platform/68360/config.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/m68k/platform/68360/config.c b/arch/m68k/platform/68360/config.c index 255fc03913e..9877cefad1e 100644 --- a/arch/m68k/platform/68360/config.c +++ b/arch/m68k/platform/68360/config.c @@ -35,6 +35,7 @@ extern void m360_cpm_reset(void); #define OSCILLATOR (unsigned long int)33000000 #endif +static irq_handler_t timer_interrupt; unsigned long int system_clock; extern QUICC *pquicc; @@ -52,7 +53,7 @@ static irqreturn_t hw_tick(int irq, void *dummy) pquicc->timer_ter1 = 0x0002; /* clear timer event */ - return arch_timer_interrupt(irq, dummy); + return timer_interrupt(irq, dummy); } static struct irqaction m68360_timer_irq = { @@ -61,7 +62,7 @@ static struct irqaction m68360_timer_irq = { .handler = hw_tick, }; -void hw_timer_init(void) +void hw_timer_init(irq_handler_t handler) { unsigned char prescaler; unsigned short tgcr_save; @@ -94,6 +95,8 @@ void hw_timer_init(void) pquicc->timer_ter1 = 0x0003; /* clear timer events */ + timer_interrupt = handler; + /* enable timer 1 interrupt in CIMR */ setup_irq(CPMVEC_TIMER1, &m68360_timer_irq); From 70c778f7a1075cf688ef50ca52c464e7ea60b506 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 23 May 2012 13:43:52 +1000 Subject: [PATCH 361/475] m68k: make syscall_trace_enter/leave exist for non-MMU classic m68k types The assembler entry code calls directly to the syscall_trace_enter() and syscall_trace_leave() functions. But currently they are conditionaly compiled out for the non-MMU classic m68k CPU types (so 68328 for example), resulting in a link error: LD vmlinux arch/m68k/platform/68328/built-in.o: In function `do_trace': (.text+0x1c): undefined reference to `syscall_trace_enter' arch/m68k/platform/68328/built-in.o: In function `do_trace': (.text+0x4c): undefined reference to `syscall_trace_leave' Change the conditional check that includes these functions to be true for the !defined(CONFIG_MMU) case as well. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven --- arch/m68k/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 8b4a2222e65..1bc10e62b9a 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -286,7 +286,7 @@ asmlinkage void syscall_trace(void) } } -#ifdef CONFIG_COLDFIRE +#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU) asmlinkage int syscall_trace_enter(void) { int ret = 0; From e32025a56403df4386cd61a741c0a36afe79ae8a Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 11 Jun 2012 23:18:33 -0300 Subject: [PATCH 362/475] x86: kvmclock: remove check_and_clear_guest_paused warning CPU offline path calls the hrtimer interrupt handler with interrupts disabled, without touching preempt_count, triggering this warning. Remove the warning since it is supposed to be used from hrtimer interrupt context only. Signed-off-by: Marcelo Tosatti --- arch/x86/kernel/kvmclock.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 086eb58c6e8..f1b42b3a186 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -120,11 +120,6 @@ bool kvm_check_and_clear_guest_paused(void) bool ret = false; struct pvclock_vcpu_time_info *src; - /* - * per_cpu() is safe here because this function is only called from - * timer functions where preemption is already disabled. - */ - WARN_ON(!in_atomic()); src = &__get_cpu_var(hv_clock); if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); From 3fbd8716da4c69ddbb76c022f3f4a0d50723c68f Mon Sep 17 00:00:00 2001 From: Rafal Prylowski Date: Mon, 28 May 2012 23:35:54 +0800 Subject: [PATCH 363/475] leds: don't disable blinking when writing the same value to delay_on or delay_off Function led_set_software_blink() assumes that blink timer is still running, but commit 488bc35bf40df89d37486c1826b178a2fba36ce7 introduced disabling of blink timer before each call to led_set_software_blink(). Correct led_software_blink(): 1) remove protection against reprogramming blink timer to the same values, because it only disables blinking now, 2) remove unnecessary call to led_stop_software_blink(). Signed-off-by: Rafal Prylowski Cc: Richard Purdie Signed-off-by: Bryan Wu --- drivers/leds/led-core.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index d6860043f6f..d65353d8d3f 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -44,13 +44,6 @@ static void led_set_software_blink(struct led_classdev *led_cdev, if (!led_cdev->blink_brightness) led_cdev->blink_brightness = led_cdev->max_brightness; - if (led_get_trigger_data(led_cdev) && - delay_on == led_cdev->blink_delay_on && - delay_off == led_cdev->blink_delay_off) - return; - - led_stop_software_blink(led_cdev); - led_cdev->blink_delay_on = delay_on; led_cdev->blink_delay_off = delay_off; From 9cd5ec5e5447f99ffb33ce39c1fcd0e3306e11d1 Mon Sep 17 00:00:00 2001 From: Jeffrin Jose Date: Sun, 3 Jun 2012 20:11:52 +0800 Subject: [PATCH 364/475] leds: fixed a coding style issue. Fixed a coding style issue relating to trailing white space error found by checkpatch.pl tool in drivers/leds/led-class.c Signed-off-by: Jeffrin Jose Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 8ee92c81aec..e663e6f413e 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -29,7 +29,7 @@ static void led_update_brightness(struct led_classdev *led_cdev) led_cdev->brightness = led_cdev->brightness_get(led_cdev); } -static ssize_t led_brightness_show(struct device *dev, +static ssize_t led_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct led_classdev *led_cdev = dev_get_drvdata(dev); From 9473c4c16700ec965904f23234fd0f8677ce6231 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 9 Jun 2012 07:41:42 +0800 Subject: [PATCH 365/475] leds: Make LEDS_ASIC3 and LEDS_RENESAS_TPU depend on LEDS_CLASS=y Otherwise, I got below build error when CONFIG_LEDS_CLASS=m. LD init/built-in.o drivers/built-in.o: In function `asic3_led_probe': clkdev.c:(.devinit.text+0x4680): undefined reference to `led_classdev_register' drivers/built-in.o: In function `r_tpu_probe': clkdev.c:(.devinit.text+0x4838): undefined reference to `led_classdev_register' drivers/built-in.o: In function `asic3_led_remove': clkdev.c:(.devexit.text+0x564): undefined reference to `led_classdev_unregister' drivers/built-in.o: In function `r_tpu_remove': clkdev.c:(.devexit.text+0x5a0): undefined reference to `led_classdev_unregister' make: *** [vmlinux] Error 1 Signed-off-by: Axel Lin Signed-off-by: Bryan Wu --- drivers/leds/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 04cb8c88d74..12b2b55c519 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -379,7 +379,7 @@ config LEDS_NETXBIG config LEDS_ASIC3 bool "LED support for the HTC ASIC3" - depends on LEDS_CLASS + depends on LEDS_CLASS=y depends on MFD_ASIC3 default y help @@ -390,7 +390,7 @@ config LEDS_ASIC3 config LEDS_RENESAS_TPU bool "LED support for Renesas TPU" - depends on LEDS_CLASS && HAVE_CLK && GENERIC_GPIO + depends on LEDS_CLASS=y && HAVE_CLK && GENERIC_GPIO help This option enables build of the LED TPU platform driver, suitable to drive any TPU channel on newer Renesas SoCs. From b84297197ce60bb5da14cbd5516cf5130b0cd380 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Jun 2012 16:46:55 -0700 Subject: [PATCH 366/475] exofs: fix sparse non-ANSI function warning Fix sparse non-ANSI function warning: fs/exofs/sys.c:112:28: warning: non-ANSI function declaration of function 'exofs_sysfs_dbg_print' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/exofs/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index e32bc919e4e..5a7b691e748 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c @@ -109,7 +109,7 @@ static struct kobj_type odev_ktype = { static struct kobj_type uuid_ktype = { }; -void exofs_sysfs_dbg_print() +void exofs_sysfs_dbg_print(void) { #ifdef CONFIG_EXOFS_DEBUG struct kobject *k_name, *k_tmp; From efec381ced2c0416c3765abce076896cf464dc0c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 6 Jun 2012 22:50:41 +0200 Subject: [PATCH 367/475] pinctrl: nomadik: add clk_prepare() call We now strictly require clk_prepare() calls to be issued before any clk_enable() calls. Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-nomadik.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c index b26395d1634..ba7122ff480 100644 --- a/drivers/pinctrl/pinctrl-nomadik.c +++ b/drivers/pinctrl/pinctrl-nomadik.c @@ -1246,6 +1246,7 @@ static int __devinit nmk_gpio_probe(struct platform_device *dev) ret = PTR_ERR(clk); goto out_unmap; } + clk_prepare(clk); nmk_chip = kzalloc(sizeof(*nmk_chip), GFP_KERNEL); if (!nmk_chip) { From e85bbc19d5a9ad2b48bf8f2fd3277d6fe58f23ce Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 12 Jun 2012 12:43:06 +0200 Subject: [PATCH 368/475] pinctrl: nomadik: fix up typo Commit a60b57eddaa8af6c02cf7bbeb58ebf82881f08ac "drivers/gpio: gpio-nomadik: Add support for irqdomains" changed GPIO offset calculations to have this form: (gpio % NMK_GPIO_PER_CHIP) except in this one place for setting sleep mode, where the conversion was all wrong, and instead mod:ing the GPIO with the IRQ base which does not make any sense. So fix this up so we can use sleepmode. Reviewed-by: Lee Jones Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-nomadik.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c index ba7122ff480..e8937e7e499 100644 --- a/drivers/pinctrl/pinctrl-nomadik.c +++ b/drivers/pinctrl/pinctrl-nomadik.c @@ -673,7 +673,7 @@ static void __nmk_gpio_set_wake(struct nmk_gpio_chip *nmk_chip, * wakeup is anyhow controlled by the RIMSC and FIMSC registers. */ if (nmk_chip->sleepmode && on) { - __nmk_gpio_set_slpm(nmk_chip, gpio % nmk_chip->chip.base, + __nmk_gpio_set_slpm(nmk_chip, gpio % NMK_GPIO_PER_CHIP, NMK_GPIO_SLPM_WAKEUP_ENABLE); } From 3a86a5f8abb33956446ae31b1e9c149d7b2d1d21 Mon Sep 17 00:00:00 2001 From: Devendra Naga Date: Sat, 9 Jun 2012 00:52:11 +0530 Subject: [PATCH 369/475] pinctrl: pinctrl-imx: free allocated pinctrl_map structure only once and use kernel facilities for IMX_PMX_DUMP a) as we allocate the pinctrl_map structure at imx_dt_node_to_map at line 167, anyway if its an element, or a num_elements * (sizeof(type)) elements allocated to one single pointer must be freed only once. CASE. A) as new_map is not moved and allocated like, for (i = 0; i < MAX_ELEMS; i++) { new_map[i] = kmalloc(numelems * size, GFP_KERNEL); } its freed as for (i = 0; i < MAX_ELEMS; i++) { kfree(new_map[i]); } CASE. B) and its allocated like new_map = kmalloc(numelems * size, GFP_KERNEL); it just needs kfree not as case A's. b) use KERN_DEBUG facility for the IMX_PMX_DUMP macro. Signed-off-by: Devendra Naga Acked-by: Dong Aisheng Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-imx.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index f6e7c670906..09f3a308354 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -27,16 +27,16 @@ #include "core.h" #include "pinctrl-imx.h" -#define IMX_PMX_DUMP(info, p, m, c, n) \ -{ \ - int i, j; \ - printk("Format: Pin Mux Config\n"); \ - for (i = 0; i < n; i++) { \ - j = p[i]; \ - printk("%s %d 0x%lx\n", \ - info->pins[j].name, \ - m[i], c[i]); \ - } \ +#define IMX_PMX_DUMP(info, p, m, c, n) \ +{ \ + int i, j; \ + printk(KERN_DEBUG "Format: Pin Mux Config\n"); \ + for (i = 0; i < n; i++) { \ + j = p[i]; \ + printk(KERN_DEBUG "%s %d 0x%lx\n", \ + info->pins[j].name, \ + m[i], c[i]); \ + } \ } /* The bits in CONFIG cell defined in binding doc*/ @@ -201,10 +201,7 @@ static int imx_dt_node_to_map(struct pinctrl_dev *pctldev, static void imx_dt_free_map(struct pinctrl_dev *pctldev, struct pinctrl_map *map, unsigned num_maps) { - int i; - - for (i = 0; i < num_maps; i++) - kfree(map); + kfree(map); } static struct pinctrl_ops imx_pctrl_ops = { @@ -475,9 +472,8 @@ static int __devinit imx_pinctrl_parse_groups(struct device_node *np, grp->configs[j] = config & ~IMX_PAD_SION; } -#ifdef DEBUG IMX_PMX_DUMP(info, grp->pins, grp->mux_mode, grp->configs, grp->npins); -#endif + return 0; } From c71157c54a4f86e4f355dc6952268e8536013502 Mon Sep 17 00:00:00 2001 From: Devendra Naga Date: Thu, 7 Jun 2012 22:19:26 +0530 Subject: [PATCH 370/475] pinctrl: pinctrl-imx: free if of_get_parent fails to get the parent node of_get_parent can return null if no parent node found, so the allocated new_map should be freed. Signed-off-by: Devendra Naga Acked-by: Dong Aisheng Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-imx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index 09f3a308354..542b01b1f81 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -173,8 +173,10 @@ static int imx_dt_node_to_map(struct pinctrl_dev *pctldev, /* create mux map */ parent = of_get_parent(np); - if (!parent) + if (!parent) { + kfree(new_map); return -EINVAL; + } new_map[0].type = PIN_MAP_TYPE_MUX_GROUP; new_map[0].data.mux.function = parent->name; new_map[0].data.mux.group = np->name; From 67695f2eae210b0631fb92cf5649d0454953e230 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 8 Jun 2012 21:33:12 +0800 Subject: [PATCH 371/475] pinctrl: pinctrl-imx: fix incorrect debug message of maps After create config map, the new_map pointer becomes point to PIN_MAP_TYPE_CONFIGS_PIN map rather than PIN_MAP_TYPE_MUX_GROUP map any more. Thus using new_map pointer to display the MUX_GROUP info is not correct. Using map pointer instead to show the correct MUX_GROUP map info. Original the debug message is: imx6q-pinctrl 20e0000.iomuxc: maps: function Yp group MX6Q_PAD_SD3_CMD num 12 After fix it is: imx6q-pinctrl 20e0000.iomuxc: maps: function usdhc3 group usdhc3grp-1 num 11 Reported-by: Richard Zhao Signed-off-by: Dong Aisheng Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index 542b01b1f81..dd6d93aa533 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -195,7 +195,7 @@ static int imx_dt_node_to_map(struct pinctrl_dev *pctldev, } dev_dbg(pctldev->dev, "maps: function %s group %s num %d\n", - new_map->data.mux.function, new_map->data.mux.group, map_num); + (*map)->data.mux.function, (*map)->data.mux.group, map_num); return 0; } From 0439f31c35d1da0b28988b308ea455e38e6a350d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jun 2012 10:37:08 +0300 Subject: [PATCH 372/475] NFSv4.1: integer overflow in decode_cb_sequence_args() This seems like it could overflow on 32 bits. Use kmalloc_array() which has overflow protection built in. Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 95bfc243992..27c2969a9d0 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -455,9 +455,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, args->csa_nrclists = ntohl(*p++); args->csa_rclists = NULL; if (args->csa_nrclists) { - args->csa_rclists = kmalloc(args->csa_nrclists * - sizeof(*args->csa_rclists), - GFP_KERNEL); + args->csa_rclists = kmalloc_array(args->csa_nrclists, + sizeof(*args->csa_rclists), + GFP_KERNEL); if (unlikely(args->csa_rclists == NULL)) goto out; From e216c8c771c9a77f14d7e8b4131846b038f6c145 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jun 2012 10:37:39 +0300 Subject: [PATCH 373/475] NFS: add an endian notation for sparse This is supposed to be a __be32 value. Sparse complains a lot: fs/nfs/callback_xdr.c:699:30: warning: incorrect type in initializer (different base types) fs/nfs/callback_xdr.c:699:30: expected unsigned int [unsigned] status fs/nfs/callback_xdr.c:699:30: got restricted __be32 const [usertype] csr_status fs/nfs/callback_xdr.c:715:9: warning: cast to restricted __be32 fs/nfs/callback_xdr.c:716:16: warning: incorrect type in return expression (different base types) fs/nfs/callback_xdr.c:716:16: expected restricted __be32 fs/nfs/callback_xdr.c:716:16: got unsigned int [unsigned] status Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 27c2969a9d0..e64b01d2a33 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -696,7 +696,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, const struct cb_sequenceres *res) { __be32 *p; - unsigned status = res->csr_status; + __be32 status = res->csr_status; if (unlikely(status != 0)) goto out; From 0bf7481852c8ece4888c299ba0259b789c3dbde3 Mon Sep 17 00:00:00 2001 From: Devendra Naga Date: Sat, 9 Jun 2012 17:31:23 +0530 Subject: [PATCH 374/475] pinctrl: pinctrl-mxs: Take care of frees if the kzalloc fails if there is no purecfg , the group pointer is allocated using kzalloc and if it fails to allocate, we wont free the new_map, if config is true, we call kmemdup and if it fails to do so we wont free the allocated group if there is no purecfg. fix this by doing the frees of new_map pointer and group pointers. Acked-by: Dong Aisheng Signed-off-by: Devendra Naga Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mxs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c index 556e45a213e..9d46303a84e 100644 --- a/drivers/pinctrl/pinctrl-mxs.c +++ b/drivers/pinctrl/pinctrl-mxs.c @@ -107,8 +107,10 @@ static int mxs_dt_node_to_map(struct pinctrl_dev *pctldev, /* Compose group name */ group = kzalloc(length, GFP_KERNEL); - if (!group) - return -ENOMEM; + if (!group) { + ret = -ENOMEM; + goto free; + } snprintf(group, length, "%s.%d", np->name, reg); new_map[i].data.mux.group = group; i++; @@ -118,7 +120,7 @@ static int mxs_dt_node_to_map(struct pinctrl_dev *pctldev, pconfig = kmemdup(&config, sizeof(config), GFP_KERNEL); if (!pconfig) { ret = -ENOMEM; - goto free; + goto free_group; } new_map[i].type = PIN_MAP_TYPE_CONFIGS_GROUP; @@ -133,6 +135,9 @@ static int mxs_dt_node_to_map(struct pinctrl_dev *pctldev, return 0; +free_group: + if (!purecfg) + free(group); free: kfree(new_map); return ret; From 149ff9e1f240e7b57a02e0be5f74f21d0307d1d3 Mon Sep 17 00:00:00 2001 From: Devendra Naga Date: Sat, 9 Jun 2012 17:32:31 +0530 Subject: [PATCH 375/475] pinctrl: pinctrl-mxs: set platform driver data to NULL at errpath and at unregister clear the platform data pointer when mxs_pinctrl_probe_dt fails, and also before the unregistering with pinctrl subsystem. Signed-off-by: Devendra Naga Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mxs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c index 9d46303a84e..afb50ee6459 100644 --- a/drivers/pinctrl/pinctrl-mxs.c +++ b/drivers/pinctrl/pinctrl-mxs.c @@ -516,6 +516,7 @@ int __devinit mxs_pinctrl_probe(struct platform_device *pdev, return 0; err: + platform_set_drvdata(pdev, NULL); iounmap(d->base); return ret; } @@ -525,6 +526,7 @@ int __devexit mxs_pinctrl_remove(struct platform_device *pdev) { struct mxs_pinctrl_data *d = platform_get_drvdata(pdev); + platform_set_drvdata(pdev, NULL); pinctrl_unregister(d->pctl); iounmap(d->base); From fe4561680519019cc15d660862dce513ded2f3a7 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 12 Jun 2012 11:27:01 -0300 Subject: [PATCH 376/475] drm: increase DRM_OBJECT_MAX_PROPERTY to 24 Before Kernel 3.5, no one was checking for the return value of drm_connector_attach_property, so we never noticed that we were unable to create some properties. Commit "drm: WARN() when drm_connector_attach_property fails" added a WARN when we fail to create a property, and the transition from "connector properties" to "object properties" changed the warning message a little bit. On i915 machines with many TV connectors we hit the maximum number of properties (since each TV connector uses a lot of properties), so we get a few backtraces in our logs. This commit increases the maximum number of properties to 24 hoping we'll have enough room for everybody. Chris suggested that we convert this code to "lists", but I believe this conversion can come after we make sure people's dmesgs are not spammed by our driver. Signed-off-by: Paulo Zanoni Reported-by: Dave Jones Tested-by: Daniel Vetter Signed-off-by: Dave Airlie --- include/drm/drm_crtc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 73e45600f95..bac55c21511 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -54,7 +54,7 @@ struct drm_mode_object { struct drm_object_properties *properties; }; -#define DRM_OBJECT_MAX_PROPERTY 16 +#define DRM_OBJECT_MAX_PROPERTY 24 struct drm_object_properties { int count; uint32_t ids[DRM_OBJECT_MAX_PROPERTY]; From a393c730ab69617c3291a3b0b2a228c9be2fc28c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 12 Jun 2012 13:28:42 +0200 Subject: [PATCH 377/475] drm/ttm: Fix buffer object metadata accounting regression v2 A regression was introduced in the 3.3 rc series, commit "drm/ttm: simplify memory accounting for ttm user v2", causing the metadata of buffer objects created using the ttm_bo_create() function to be accounted twice. That causes massive leaks with the vmwgfx driver running for example SpecViewperf Catia-03 test 2, eventually killing the app. Furthermore, the same commit introduces a regression where metadata accounting is leaked if a buffer object is initialized with an illegal size. This is also fixed with this commit. v2: Fixed an error path and removed an unused variable. Signed-off-by: Thomas Hellstrom Reviewed-by: Konrad Rzeszutek Wilk Cc: Jerome Glisse Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index b67cfcaa661..36f4b28c1b9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1204,6 +1204,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev, (*destroy)(bo); else kfree(bo); + ttm_mem_global_free(mem_glob, acc_size); return -EINVAL; } bo->destroy = destroy; @@ -1307,22 +1308,14 @@ int ttm_bo_create(struct ttm_bo_device *bdev, struct ttm_buffer_object **p_bo) { struct ttm_buffer_object *bo; - struct ttm_mem_global *mem_glob = bdev->glob->mem_glob; size_t acc_size; int ret; - acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object)); - ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false); - if (unlikely(ret != 0)) - return ret; - bo = kzalloc(sizeof(*bo), GFP_KERNEL); - - if (unlikely(bo == NULL)) { - ttm_mem_global_free(mem_glob, acc_size); + if (unlikely(bo == NULL)) return -ENOMEM; - } + acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object)); ret = ttm_bo_init(bdev, bo, size, type, placement, page_alignment, buffer_start, interruptible, persistent_swap_storage, acc_size, NULL, NULL); From 6b18f79399f25a0f8e2b915b2dcb8bf5c7aa470d Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 12 Jun 2012 16:16:17 +0530 Subject: [PATCH 378/475] ALSA: compress_core: don't wake up on pause during pause the core should maintain the status-quo on the device and pointers and not wake up. If app needs it should call DROP explcitly. Signed-off-by: Namarta Kohli Signed-off-by: Vinod Koul Signed-off-by: Takashi Iwai --- sound/core/compress_offload.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index a68aed7fce0..375f7a0d66e 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -502,10 +502,8 @@ static int snd_compr_pause(struct snd_compr_stream *stream) if (stream->runtime->state != SNDRV_PCM_STATE_RUNNING) return -EPERM; retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_PAUSE_PUSH); - if (!retval) { + if (!retval) stream->runtime->state = SNDRV_PCM_STATE_PAUSED; - wake_up(&stream->runtime->sleep); - } return retval; } From 8b21460ac6c0c88a0fec1cc70906c8e25c5aaa54 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 12 Jun 2012 16:16:18 +0530 Subject: [PATCH 379/475] ALSA: compress_core: cleanup pointers on stop as the start can be called after stop again, we need to reset state Signed-off-by: Namarta Kohli Signed-off-by: Vinod Koul Signed-off-by: Takashi Iwai --- sound/core/compress_offload.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 375f7a0d66e..ec2118d0e27 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -542,6 +542,10 @@ static int snd_compr_stop(struct snd_compr_stream *stream) if (!retval) { stream->runtime->state = SNDRV_PCM_STATE_SETUP; wake_up(&stream->runtime->sleep); + stream->runtime->hw_pointer = 0; + stream->runtime->app_pointer = 0; + stream->runtime->total_bytes_available = 0; + stream->runtime->total_bytes_transferred = 0; } return retval; } From edfe3bfc1b779ddda9bcff523eb022dda37b93c8 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Tue, 12 Jun 2012 13:15:12 +0200 Subject: [PATCH 380/475] ALSA: HDA: Pin fixup for Zotac Z68 motherboard Pin 0x1b was connected to the front panel connector, which according to the HDA standard should contain a mic and a headphone. In this case, the headphone was listed as "line out" by BIOS. Cc: stable@kernel.org BugLink: https://bugs.launchpad.net/bugs/993162 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 224410e8e9e..b8966817ccf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6439,6 +6439,7 @@ enum { ALC662_FIXUP_ASUS_MODE7, ALC662_FIXUP_ASUS_MODE8, ALC662_FIXUP_NO_JACK_DETECT, + ALC662_FIXUP_ZOTAC_Z68, }; static const struct alc_fixup alc662_fixups[] = { @@ -6588,6 +6589,13 @@ static const struct alc_fixup alc662_fixups[] = { .type = ALC_FIXUP_FUNC, .v.func = alc_fixup_no_jack_detect, }, + [ALC662_FIXUP_ZOTAC_Z68] = { + .type = ALC_FIXUP_PINS, + .v.pins = (const struct alc_pincfg[]) { + { 0x1b, 0x02214020 }, /* Front HP */ + { } + } + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -6601,6 +6609,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), + SND_PCI_QUIRK(0x19da, 0xa130, "Zotac Z68", ALC662_FIXUP_ZOTAC_Z68), SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T), #if 0 From 8a93664df90db983cfede122f9b4ddb3a8284e52 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Sun, 10 Jun 2012 23:00:20 +0000 Subject: [PATCH 381/475] bonding:record primary when modify it via sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we modify primary via sysfs and it is not a valid slave, we should record it for future use, and this behavior is the same with bond_check_params(). Signed-off-by: Weiping Pan Acked-by: Nicolas de Pesloüan Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index aef42f04532..485bedb8278 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1082,8 +1082,12 @@ static ssize_t bonding_store_primary(struct device *d, } } - pr_info("%s: Unable to set %.*s as primary slave.\n", - bond->dev->name, (int)strlen(buf) - 1, buf); + strncpy(bond->params.primary, ifname, IFNAMSIZ); + bond->params.primary[IFNAMSIZ - 1] = 0; + + pr_info("%s: Recording %s as primary, " + "but it has not been enslaved to %s yet.\n", + bond->dev->name, ifname, bond->dev->name); out: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); From 5ee31c6898ea5537fcea160999d60dc63bc0c305 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 06:03:51 +0000 Subject: [PATCH 382/475] bonding: Fix corrupted queue_mapping In the transmit path of the bonding driver, skb->cb is used to stash the skb->queue_mapping so that the bonding device can set its own queue mapping. This value becomes corrupted since the skb->cb is also used in __dev_xmit_skb. When transmitting through bonding driver, bond_select_queue is called from dev_queue_xmit. In bond_select_queue the original skb->queue_mapping is copied into skb->cb (via bond_queue_mapping) and skb->queue_mapping is overwritten with the bond driver queue. Subsequently in dev_queue_xmit, __dev_xmit_skb is called which writes the packet length into skb->cb, thereby overwriting the stashed queue mappping. In bond_dev_queue_xmit (called from hard_start_xmit), the queue mapping for the skb is set to the stashed value which is now the skb length and hence is an invalid queue for the slave device. If we want to save skb->queue_mapping into skb->cb[], best place is to add a field in struct qdisc_skb_cb, to make sure it wont conflict with other layers (eg : Qdiscc, Infiniband...) This patchs also makes sure (struct qdisc_skb_cb)->data is aligned on 8 bytes : netem qdisc for example assumes it can store an u64 in it, without misalignment penalty. Note : we only have 20 bytes left in (struct qdisc_skb_cb)->data[]. The largest user is CHOKe and it fills it. Based on a previous patch from Tom Herbert. Signed-off-by: Eric Dumazet Reported-by: Tom Herbert Cc: John Fastabend Cc: Roland Dreier Acked-by: Neil Horman Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 9 +++++---- include/net/sch_generic.h | 7 +++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2ee8cf9e8a3..b9c2ae62166 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -76,6 +76,7 @@ #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -381,8 +382,6 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) return next; } -#define bond_queue_mapping(skb) (*(u16 *)((skb)->cb)) - /** * bond_dev_queue_xmit - Prepare skb for xmit. * @@ -395,7 +394,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, { skb->dev = slave_dev; - skb->queue_mapping = bond_queue_mapping(skb); + BUILD_BUG_ON(sizeof(skb->queue_mapping) != + sizeof(qdisc_skb_cb(skb)->bond_queue_mapping)); + skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping; if (unlikely(netpoll_tx_running(slave_dev))) bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); @@ -4171,7 +4172,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) /* * Save the original txq to restore before passing to the driver */ - bond_queue_mapping(skb) = skb->queue_mapping; + qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping; if (unlikely(txq >= dev->real_num_tx_queues)) { do { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 55ce96b53b0..9d7d54a00e6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -220,13 +220,16 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; - unsigned char data[24]; + u16 bond_queue_mapping; + u16 _pad; + unsigned char data[20]; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz); + + BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz); BUILD_BUG_ON(sizeof(qcb->data) < sz); } From e58c5de8f59d124907c993ea3126c69493b7eec7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 12 Jun 2012 10:17:25 -0400 Subject: [PATCH 383/475] drivers/ide/ide-cs.c: adjust suspicious bit operation IO_DATA_PATH_WIDTH_8 is 0, so a bit-and with it is always false. The value IO_DATA_PATH_WIDTH covers the bits of the IO_DATA_PATH constants, so first pick those bits and then make the test using !=. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/ide/ide-cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 28e344ea514..f1e922e2479 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -167,7 +167,8 @@ static int pcmcia_check_one_config(struct pcmcia_device *pdev, void *priv_data) { int *is_kme = priv_data; - if (!(pdev->resource[0]->flags & IO_DATA_PATH_WIDTH_8)) { + if ((pdev->resource[0]->flags & IO_DATA_PATH_WIDTH) + != IO_DATA_PATH_WIDTH_8) { pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH; pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO; } From de102ef41f24a4c251c4a3838796bb27557d4d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 19 May 2012 19:19:48 +0200 Subject: [PATCH 384/475] USB: cdc-wdm: Add Vodafone/Huawei K5005 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested-by: Thomas Schäfer Signed-off-by: Bjørn Mork Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index ea8b304f0e8..8fd398dffce 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -55,6 +55,15 @@ static const struct usb_device_id wdm_ids[] = { .bInterfaceSubClass = 1, .bInterfaceProtocol = 9, /* NOTE: CDC ECM control interface! */ }, + { + /* Vodafone/Huawei K5005 (12d1:14c8) and similar modems */ + .match_flags = USB_DEVICE_ID_MATCH_VENDOR | + USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = HUAWEI_VENDOR_ID, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 57, /* NOTE: CDC ECM control interface! */ + }, { } }; From 4cbbb039a9719fb3bba73d255c6a95bc6dc6428b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 19 May 2012 19:20:50 +0200 Subject: [PATCH 385/475] USB: option: Add Vodafone/Huawei K5005 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested-by: Thomas Schäfer Signed-off-by: Bjørn Mork Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1aae9028cd0..f3e4a470fea 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -150,6 +150,7 @@ static void option_instat_callback(struct urb *urb); #define HUAWEI_PRODUCT_E14AC 0x14AC #define HUAWEI_PRODUCT_K3806 0x14AE #define HUAWEI_PRODUCT_K4605 0x14C6 +#define HUAWEI_PRODUCT_K5005 0x14C8 #define HUAWEI_PRODUCT_K3770 0x14C9 #define HUAWEI_PRODUCT_K3771 0x14CA #define HUAWEI_PRODUCT_K4510 0x14CB @@ -666,6 +667,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t) &huawei_cdc12_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x31) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x32) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x33) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) }, From c41444ccfa33a1c20efa319e554cb531576e64a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 24 May 2012 11:19:04 +0200 Subject: [PATCH 386/475] USB: qcserial: Add Sierra Wireless device IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some additional IDs found in the BSD/GPL licensed out-of-tree GobiSerial driver from Sierra Wireless. Signed-off-by: Bjørn Mork Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 0d5fe59ebb9..996015c5f1a 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -105,7 +105,13 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x1410, 0xa021)}, /* Novatel Gobi 3000 Composite */ {USB_DEVICE(0x413c, 0x8193)}, /* Dell Gobi 3000 QDL */ {USB_DEVICE(0x413c, 0x8194)}, /* Dell Gobi 3000 Composite */ + {USB_DEVICE(0x1199, 0x9010)}, /* Sierra Wireless Gobi 3000 QDL */ + {USB_DEVICE(0x1199, 0x9012)}, /* Sierra Wireless Gobi 3000 QDL */ {USB_DEVICE(0x1199, 0x9013)}, /* Sierra Wireless Gobi 3000 Modem device (MC8355) */ + {USB_DEVICE(0x1199, 0x9014)}, /* Sierra Wireless Gobi 3000 QDL */ + {USB_DEVICE(0x1199, 0x9015)}, /* Sierra Wireless Gobi 3000 Modem device */ + {USB_DEVICE(0x1199, 0x9018)}, /* Sierra Wireless Gobi 3000 QDL */ + {USB_DEVICE(0x1199, 0x9019)}, /* Sierra Wireless Gobi 3000 Modem device */ {USB_DEVICE(0x12D1, 0x14F0)}, /* Sony Gobi 3000 QDL */ {USB_DEVICE(0x12D1, 0x14F1)}, /* Sony Gobi 3000 Composite */ { } /* Terminating entry */ From e00a54d772210d450e5c1a801534c3c8a448549f Mon Sep 17 00:00:00 2001 From: Evan McNabb Date: Fri, 25 May 2012 22:46:14 -0400 Subject: [PATCH 387/475] USB: ftdi-sio: Add support for RT Systems USB-RTS01 serial adapter Add support for RT Systems USB-RTS01 USB to Serial adapter: http://www.rtsystemsinc.com/Photos/USBRTS01.html Tested by controlling Icom IC-718 amateur radio transceiver via hamlib. Signed-off-by: Evan McNabb Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8c084ea34e2..bc912e5a3be 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -737,6 +737,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_SERIAL_VX7_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_CT29B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_RTS01_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MAXSTREAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PHI_FISCO_PID) }, { USB_DEVICE(TML_VID, TML_USB_SERIAL_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index f3c7c78ede3..5661c7e2d41 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -784,6 +784,7 @@ #define RTSYSTEMS_VID 0x2100 /* Vendor ID */ #define RTSYSTEMS_SERIAL_VX7_PID 0x9e52 /* Serial converter for VX-7 Radios using FT232RL */ #define RTSYSTEMS_CT29B_PID 0x9e54 /* CT29B Radio Cable */ +#define RTSYSTEMS_RTS01_PID 0x9e57 /* USB-RTS01 Radio Cable */ /* From 5bbfa6f427c1d7244a5ee154ab8fa37265a5e049 Mon Sep 17 00:00:00 2001 From: Mikko Tuumanen Date: Fri, 1 Jun 2012 11:28:55 +0300 Subject: [PATCH 388/475] USB: serial: cp210x: add Optris MS Pro usb id Signed-off-by: Mikko Tuumanen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 1b1926200ba..73d25cd8cba 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -82,6 +82,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */ { USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */ { USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */ + { USB_DEVICE(0x10C4, 0x80C4) }, /* Cygnal Integrated Products, Inc., Optris infrared thermometer */ { USB_DEVICE(0x10C4, 0x80CA) }, /* Degree Controls Inc */ { USB_DEVICE(0x10C4, 0x80DD) }, /* Tracient RFID */ { USB_DEVICE(0x10C4, 0x80F6) }, /* Suunto sports instrument */ From 19a3dd1575e954e8c004413bee3e12d3962f2525 Mon Sep 17 00:00:00 2001 From: Tom Cassidy Date: Wed, 6 Jun 2012 17:08:48 +1000 Subject: [PATCH 389/475] USB: serial: sierra: Add support for Sierra Wireless AirCard 320U modem Add support for Sierra Wireless AirCard 320U modem Signed-off-by: Tomas Cassidy Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/sierra.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index ba54a0a8235..d423d36acc0 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -294,6 +294,10 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, + /* AT&T Direct IP LTE modems */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF), + .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist + }, { USB_DEVICE(0x0f3d, 0x68A3), /* Airprime/Sierra Wireless Direct IP modems */ .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, From 42ca7da1c2363dbef4ba1b6917c4c02274b6a5e2 Mon Sep 17 00:00:00 2001 From: Andrew Bird Date: Mon, 28 May 2012 12:43:06 +0100 Subject: [PATCH 390/475] USB: option: Updated Huawei K4605 has better id Later firmwares for this device now have proper subclass and protocol info so we can identify it nicely without needing to use the blacklist. I'm not removing the old 0xff matching as there may be devices in the field that still need that. Signed-off-by: Andrew Bird Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f3e4a470fea..d9cb5526368 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -667,6 +667,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t) &huawei_cdc12_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0x01, 0x31) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0x01, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x31) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K5005, 0xff, 0x01, 0x33) }, From 1aa3c63cf0a79153ee13c8f82e4eb6c40b66a161 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 22 May 2012 20:45:13 +0100 Subject: [PATCH 391/475] USB: mct_u232: Fix incorrect TIOCMSET return The low level helper returns 1 on success. The ioctl should however return 0. As this is the only user of the helper return, make the helper return 0 or an error code. Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=43009 Signed-off-by: Alan Cox Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mct_u232.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index d0ec1aa5271..a71fa0aa040 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -309,13 +309,16 @@ static int mct_u232_set_modem_ctrl(struct usb_serial *serial, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_MODEM_CTRL_SIZE, WDR_TIMEOUT); - if (rc < 0) - dev_err(&serial->dev->dev, - "Set MODEM CTRL 0x%x failed (error = %d)\n", mcr, rc); + kfree(buf); + dbg("set_modem_ctrl: state=0x%x ==> mcr=0x%x", control_state, mcr); - kfree(buf); - return rc; + if (rc < 0) { + dev_err(&serial->dev->dev, + "Set MODEM CTRL 0x%x failed (error = %d)\n", mcr, rc); + return rc; + } + return 0; } /* mct_u232_set_modem_ctrl */ static int mct_u232_get_modem_stat(struct usb_serial *serial, From 4273f9878b0a8271df055e3c8f2e7f08c6a4a2f4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 May 2012 17:57:52 +0200 Subject: [PATCH 392/475] USB: option: fix port-data abuse Commit 8b4c6a3ab596961b78465 ("USB: option: Use generic USB wwan code") moved option port-data allocation to usb_wwan_startup but still cast the port data to the old struct... Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d9cb5526368..9c2f7b28e25 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1265,35 +1265,6 @@ static struct usb_serial_driver * const serial_drivers[] = { static bool debug; -/* per port private data */ - -#define N_IN_URB 4 -#define N_OUT_URB 4 -#define IN_BUFLEN 4096 -#define OUT_BUFLEN 4096 - -struct option_port_private { - /* Input endpoints and buffer for this port */ - struct urb *in_urbs[N_IN_URB]; - u8 *in_buffer[N_IN_URB]; - /* Output endpoints and buffer for this port */ - struct urb *out_urbs[N_OUT_URB]; - u8 *out_buffer[N_OUT_URB]; - unsigned long out_busy; /* Bit vector of URBs in use */ - int opened; - struct usb_anchor delayed; - - /* Settings for the port */ - int rts_state; /* Handshaking pins (outputs) */ - int dtr_state; - int cts_state; /* Handshaking pins (inputs) */ - int dsr_state; - int dcd_state; - int ri_state; - - unsigned long tx_start_time[N_OUT_URB]; -}; - module_usb_serial_driver(serial_drivers, option_ids); static bool is_blacklisted(const u8 ifnum, enum option_blacklist_reason reason, @@ -1367,7 +1338,8 @@ static void option_instat_callback(struct urb *urb) int err; int status = urb->status; struct usb_serial_port *port = urb->context; - struct option_port_private *portdata = usb_get_serial_port_data(port); + struct usb_wwan_port_private *portdata = + usb_get_serial_port_data(port); dbg("%s: urb %p port %p has data %p", __func__, urb, port, portdata); @@ -1427,7 +1399,7 @@ static int option_send_setup(struct usb_serial_port *port) struct usb_serial *serial = port->serial; struct usb_wwan_intf_private *intfdata = (struct usb_wwan_intf_private *) serial->private; - struct option_port_private *portdata; + struct usb_wwan_port_private *portdata; int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber; int val = 0; From b9c3aab315b51f81649a0d737c4c73783fbd8de0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 May 2012 18:22:48 +0200 Subject: [PATCH 393/475] USB: option: fix memory leak Fix memory leak introduced by commit 383cedc3bb435de7a2 ("USB: serial: full autosuspend support for the option driver") which allocates usb-serial data but never frees it. Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9c2f7b28e25..d12ee2fca76 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -47,6 +47,7 @@ /* Function prototypes */ static int option_probe(struct usb_serial *serial, const struct usb_device_id *id); +static void option_release(struct usb_serial *serial); static int option_send_setup(struct usb_serial_port *port); static void option_instat_callback(struct urb *urb); @@ -1251,7 +1252,7 @@ static struct usb_serial_driver option_1port_device = { .ioctl = usb_wwan_ioctl, .attach = usb_wwan_startup, .disconnect = usb_wwan_disconnect, - .release = usb_wwan_release, + .release = option_release, .read_int_callback = option_instat_callback, #ifdef CONFIG_PM .suspend = usb_wwan_suspend, @@ -1333,6 +1334,15 @@ static int option_probe(struct usb_serial *serial, return 0; } +static void option_release(struct usb_serial *serial) +{ + struct usb_wwan_intf_private *priv = usb_get_serial_data(serial); + + usb_wwan_release(serial); + + kfree(priv); +} + static void option_instat_callback(struct urb *urb) { int err; From b9c87663eead64c767e72a373ae6f8a94bead459 Mon Sep 17 00:00:00 2001 From: Tony Zelenoff Date: Tue, 5 Jun 2012 17:58:04 +0400 Subject: [PATCH 394/475] USB: mos7840: Fix compilation of usb serial driver The __devinitconst section can't be referenced from usb_serial_device structure. Thus removed it as it done in other mos* device drivers. Error itself: WARNING: drivers/usb/serial/mos7840.o(.data+0x8): Section mismatch in reference from the variable moschip7840_4port_device to the variable .devinit.rodata:id_table The variable moschip7840_4port_device references the variable __devinitconst id_table [v2] no attach now Signed-off-by: Tony Zelenoff Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 29160f8b510..57eca244842 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -190,7 +190,7 @@ static int device_type; -static const struct usb_device_id id_table[] __devinitconst = { +static const struct usb_device_id id_table[] = { {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)}, {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)}, {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7810)}, From 0ef0be15fd2564767f114c249fc4af704d8e16f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AF=B4=E4=B8=8D=E5=BE=97?= Date: Mon, 28 May 2012 21:31:29 +0800 Subject: [PATCH 395/475] USB: option: add more YUGA device ids Signed-off-by: gavin zhu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 40 +++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d12ee2fca76..e668a2460bd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -427,7 +427,7 @@ static void option_instat_callback(struct urb *urb); #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 -/* YUGA products www.yuga-info.com*/ +/* YUGA products www.yuga-info.com gavin.kx@qq.com */ #define YUGA_VENDOR_ID 0x257A #define YUGA_PRODUCT_CEM600 0x1601 #define YUGA_PRODUCT_CEM610 0x1602 @@ -444,6 +444,8 @@ static void option_instat_callback(struct urb *urb); #define YUGA_PRODUCT_CEU516 0x160C #define YUGA_PRODUCT_CEU528 0x160D #define YUGA_PRODUCT_CEU526 0x160F +#define YUGA_PRODUCT_CEU881 0x161F +#define YUGA_PRODUCT_CEU882 0x162F #define YUGA_PRODUCT_CWM600 0x2601 #define YUGA_PRODUCT_CWM610 0x2602 @@ -459,23 +461,26 @@ static void option_instat_callback(struct urb *urb); #define YUGA_PRODUCT_CWU518 0x260B #define YUGA_PRODUCT_CWU516 0x260C #define YUGA_PRODUCT_CWU528 0x260D +#define YUGA_PRODUCT_CWU581 0x260E #define YUGA_PRODUCT_CWU526 0x260F +#define YUGA_PRODUCT_CWU582 0x261F +#define YUGA_PRODUCT_CWU583 0x262F -#define YUGA_PRODUCT_CLM600 0x2601 -#define YUGA_PRODUCT_CLM610 0x2602 -#define YUGA_PRODUCT_CLM500 0x2603 -#define YUGA_PRODUCT_CLM510 0x2604 -#define YUGA_PRODUCT_CLM800 0x2605 -#define YUGA_PRODUCT_CLM900 0x2606 +#define YUGA_PRODUCT_CLM600 0x3601 +#define YUGA_PRODUCT_CLM610 0x3602 +#define YUGA_PRODUCT_CLM500 0x3603 +#define YUGA_PRODUCT_CLM510 0x3604 +#define YUGA_PRODUCT_CLM800 0x3605 +#define YUGA_PRODUCT_CLM900 0x3606 -#define YUGA_PRODUCT_CLU718 0x2607 -#define YUGA_PRODUCT_CLU716 0x2608 -#define YUGA_PRODUCT_CLU728 0x2609 -#define YUGA_PRODUCT_CLU726 0x260A -#define YUGA_PRODUCT_CLU518 0x260B -#define YUGA_PRODUCT_CLU516 0x260C -#define YUGA_PRODUCT_CLU528 0x260D -#define YUGA_PRODUCT_CLU526 0x260F +#define YUGA_PRODUCT_CLU718 0x3607 +#define YUGA_PRODUCT_CLU716 0x3608 +#define YUGA_PRODUCT_CLU728 0x3609 +#define YUGA_PRODUCT_CLU726 0x360A +#define YUGA_PRODUCT_CLU518 0x360B +#define YUGA_PRODUCT_CLU516 0x360C +#define YUGA_PRODUCT_CLU528 0x360D +#define YUGA_PRODUCT_CLU526 0x360F /* Viettel products */ #define VIETTEL_VENDOR_ID 0x2262 @@ -1216,6 +1221,11 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU881) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU882) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU581) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU582) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU583) }, { USB_DEVICE_AND_INTERFACE_INFO(VIETTEL_VENDOR_ID, VIETTEL_PRODUCT_VT1000, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZD_VENDOR_ID, ZD_PRODUCT_7000, 0xff, 0xff, 0xff) }, { USB_DEVICE(LG_VENDOR_ID, LG_PRODUCT_L02C) }, /* docomo L-02C modem */ From 6dae14216c85eea13db7b12c469475c5d30e5499 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 13 Jun 2012 00:28:23 +0200 Subject: [PATCH 396/475] serial: sh-sci: Fix probe error paths When probing fails, the driver must not try to cleanup resources that have not been initialized. Fix this. Signed-off-by: Laurent Pinchart Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 4604153b795..27df2ad4826 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2179,6 +2179,16 @@ static int __devinit sci_init_single(struct platform_device *dev, return 0; } +static void sci_cleanup_single(struct sci_port *port) +{ + sci_free_gpios(port); + + clk_put(port->iclk); + clk_put(port->fclk); + + pm_runtime_disable(port->port.dev); +} + #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE static void serial_console_putchar(struct uart_port *port, int ch) { @@ -2360,14 +2370,10 @@ static int sci_remove(struct platform_device *dev) cpufreq_unregister_notifier(&port->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); - sci_free_gpios(port); - uart_remove_one_port(&sci_uart_driver, &port->port); - clk_put(port->iclk); - clk_put(port->fclk); + sci_cleanup_single(port); - pm_runtime_disable(&dev->dev); return 0; } @@ -2392,7 +2398,13 @@ static int __devinit sci_probe_single(struct platform_device *dev, if (ret) return ret; - return uart_add_one_port(&sci_uart_driver, &sciport->port); + ret = uart_add_one_port(&sci_uart_driver, &sciport->port); + if (ret) { + sci_cleanup_single(sciport); + return ret; + } + + return 0; } static int __devinit sci_probe(struct platform_device *dev) @@ -2413,24 +2425,22 @@ static int __devinit sci_probe(struct platform_device *dev) ret = sci_probe_single(dev, dev->id, p, sp); if (ret) - goto err_unreg; + return ret; sp->freq_transition.notifier_call = sci_notifier; ret = cpufreq_register_notifier(&sp->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); - if (unlikely(ret < 0)) - goto err_unreg; + if (unlikely(ret < 0)) { + sci_cleanup_single(sp); + return ret; + } #ifdef CONFIG_SH_STANDARD_BIOS sh_bios_gdb_detach(); #endif return 0; - -err_unreg: - sci_remove(dev); - return ret; } static int sci_suspend(struct device *dev) From b6c5ef6f6d3e46d6200b141c30ac000a11b851df Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 13 Jun 2012 00:28:24 +0200 Subject: [PATCH 397/475] serial: sh-sci: Make probe fail for ports that exceed the maximum count The driver supports a maximum number of ports configurable at compile time. Make sure the probe() method fails when registering a port that exceeds the maximum instead of returning success without registering the port. This fixes a crash at system suspend time, when the driver tried to suspend a non-registered port using the UART core. Signed-off-by: Laurent Pinchart Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 27df2ad4826..1bd9163bc11 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2391,7 +2391,7 @@ static int __devinit sci_probe_single(struct platform_device *dev, index+1, SCI_NPORTS); dev_notice(&dev->dev, "Consider bumping " "CONFIG_SERIAL_SH_SCI_NR_UARTS!\n"); - return 0; + return -EINVAL; } ret = sci_init_single(dev, sciport, index, p); From 74ca4313bdd0423a7917bbe74be3f27da8a39fe1 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Fri, 8 Jun 2012 22:49:17 +0200 Subject: [PATCH 398/475] sh: Kill off last dead UBC header Commit 7025bec9125b0a02edcaf22c2dce753bf2c95480 ("sh: Kill off dead UBC headers.") skipped arch/sh/include/cpu-sh2a/cpu/ubc.h. Since nothing is using that header either, kill it off too. Signed-off-by: Paul Bolle Signed-off-by: Paul Mundt --- arch/sh/include/cpu-sh2a/cpu/ubc.h | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 arch/sh/include/cpu-sh2a/cpu/ubc.h diff --git a/arch/sh/include/cpu-sh2a/cpu/ubc.h b/arch/sh/include/cpu-sh2a/cpu/ubc.h deleted file mode 100644 index 1192e1c761a..00000000000 --- a/arch/sh/include/cpu-sh2a/cpu/ubc.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * SH-2A UBC definitions - * - * Copyright (C) 2008 Kieran Bingham - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef __ASM_CPU_SH2A_UBC_H -#define __ASM_CPU_SH2A_UBC_H - -#define UBC_BARA 0xfffc0400 -#define UBC_BAMRA 0xfffc0404 -#define UBC_BBRA 0xfffc04a0 /* 16 bit access */ -#define UBC_BDRA 0xfffc0408 -#define UBC_BDMRA 0xfffc040c - -#define UBC_BARB 0xfffc0410 -#define UBC_BAMRB 0xfffc0414 -#define UBC_BBRB 0xfffc04b0 /* 16 bit access */ -#define UBC_BDRB 0xfffc0418 -#define UBC_BDMRB 0xfffc041c - -#define UBC_BRCR 0xfffc04c0 - -#endif /* __ASM_CPU_SH2A_UBC_H */ From 0e100e11bd73be4e28e457cf5ad49a6892d5d1fe Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 25 May 2012 13:02:48 +0900 Subject: [PATCH 399/475] sh: switch to generic strncpy_from_user(). This kills off the special sh32/64 versions and adopts the generic version. It should be possible to optimize this for SH-4A unaligned loads, but this is a corner case that can be supported incrementally. Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 1 + arch/sh/include/asm/uaccess.h | 35 ++++----------------------- arch/sh/include/asm/uaccess_32.h | 39 ------------------------------ arch/sh/include/asm/uaccess_64.h | 2 -- arch/sh/kernel/cpu/sh5/entry.S | 41 -------------------------------- arch/sh/kernel/sh_ksyms_64.c | 1 - 6 files changed, 5 insertions(+), 114 deletions(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 99bcd0ee838..cbffc26c043 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -32,6 +32,7 @@ config SUPERH select GENERIC_SMP_IDLE_THREAD select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST + select GENERIC_STRNCPY_FROM_USER help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index 050f221fa89..3df7eee9c15 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -25,6 +25,8 @@ (__chk_user_ptr(addr), \ __access_ok((unsigned long __force)(addr), (size))) +#define user_addr_max() (current_thread_info()->addr_limit.seg) + /* * Uh, these should become the main single-value transfer routines ... * They automatically use the right size if we just have the right @@ -100,6 +102,8 @@ struct __large_struct { unsigned long buf[100]; }; # include "uaccess_64.h" #endif +extern long strncpy_from_user(char *dest, const char __user *src, long count); + /* Generic arbitrary sized copy. */ /* Return the number of bytes NOT copied */ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); @@ -137,37 +141,6 @@ __kernel_size_t __clear_user(void *addr, __kernel_size_t size); __cl_size; \ }) -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -#define strncpy_from_user(dest,src,count) \ -({ \ - unsigned long __sfu_src = (unsigned long)(src); \ - int __sfu_count = (int)(count); \ - long __sfu_res = -EFAULT; \ - \ - if (__access_ok(__sfu_src, __sfu_count)) \ - __sfu_res = __strncpy_from_user((unsigned long)(dest), \ - __sfu_src, __sfu_count); \ - \ - __sfu_res; \ -}) - static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { diff --git a/arch/sh/include/asm/uaccess_32.h b/arch/sh/include/asm/uaccess_32.h index ae0d24f6653..ef44d05bb63 100644 --- a/arch/sh/include/asm/uaccess_32.h +++ b/arch/sh/include/asm/uaccess_32.h @@ -170,45 +170,6 @@ __asm__ __volatile__( \ extern void __put_user_unknown(void); -static inline int -__strncpy_from_user(unsigned long __dest, unsigned long __user __src, int __count) -{ - __kernel_size_t res; - unsigned long __dummy, _d, _s, _c; - - __asm__ __volatile__( - "9:\n" - "mov.b @%2+, %1\n\t" - "cmp/eq #0, %1\n\t" - "bt/s 2f\n" - "1:\n" - "mov.b %1, @%3\n\t" - "dt %4\n\t" - "bf/s 9b\n\t" - " add #1, %3\n\t" - "2:\n\t" - "sub %4, %0\n" - "3:\n" - ".section .fixup,\"ax\"\n" - "4:\n\t" - "mov.l 5f, %1\n\t" - "jmp @%1\n\t" - " mov %9, %0\n\t" - ".balign 4\n" - "5: .long 3b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .balign 4\n" - " .long 9b,4b\n" - ".previous" - : "=r" (res), "=&z" (__dummy), "=r" (_s), "=r" (_d), "=r"(_c) - : "0" (__count), "2" (__src), "3" (__dest), "4" (__count), - "i" (-EFAULT) - : "memory", "t"); - - return res; -} - /* * Return the size of a string (including the ending 0 even when we have * exceeded the maximum string length). diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h index 56fd20b8cdc..0042c9054b2 100644 --- a/arch/sh/include/asm/uaccess_64.h +++ b/arch/sh/include/asm/uaccess_64.h @@ -85,7 +85,5 @@ extern long __put_user_asm_q(void *, long); extern void __put_user_unknown(void); extern long __strnlen_user(const char *__s, long __n); -extern int __strncpy_from_user(unsigned long __dest, - unsigned long __user __src, int __count); #endif /* __ASM_SH_UACCESS_64_H */ diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index ff1f0e6e9be..b7b3f63299b 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -1568,46 +1568,6 @@ ___clear_user_exit: #endif /* CONFIG_MMU */ -/* - * int __strncpy_from_user(unsigned long __dest, unsigned long __src, - * int __count) - * - * Inputs: - * (r2) target address - * (r3) source address - * (r4) maximum size in bytes - * - * Ouputs: - * (*r2) copied data - * (r2) -EFAULT (in case of faulting) - * copied data (otherwise) - */ - .global __strncpy_from_user -__strncpy_from_user: - pta ___strncpy_from_user1, tr0 - pta ___strncpy_from_user_done, tr1 - or r4, ZERO, r5 /* r5 = original count */ - beq/u r4, r63, tr1 /* early exit if r4==0 */ - movi -(EFAULT), r6 /* r6 = reply, no real fixup */ - or ZERO, ZERO, r7 /* r7 = data, clear top byte of data */ - -___strncpy_from_user1: - ld.b r3, 0, r7 /* Fault address: only in reading */ - st.b r2, 0, r7 - addi r2, 1, r2 - addi r3, 1, r3 - beq/u ZERO, r7, tr1 - addi r4, -1, r4 /* return real number of copied bytes */ - bne/l ZERO, r4, tr0 - -___strncpy_from_user_done: - sub r5, r4, r6 /* If done, return copied */ - -___strncpy_from_user_exit: - or r6, ZERO, r2 - ptabs LINK, tr0 - blink tr0, ZERO - /* * extern long __strnlen_user(const char *__s, long __n) * @@ -1982,7 +1942,6 @@ asm_uaccess_start: .long ___copy_user2, ___copy_user_exit .long ___clear_user1, ___clear_user_exit #endif - .long ___strncpy_from_user1, ___strncpy_from_user_exit .long ___strnlen_user1, ___strnlen_user_exit .long ___get_user_asm_b1, ___get_user_asm_b_exit .long ___get_user_asm_w1, ___get_user_asm_w_exit diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c index 45afa5c51f6..3a3ba60e4f9 100644 --- a/arch/sh/kernel/sh_ksyms_64.c +++ b/arch/sh/kernel/sh_ksyms_64.c @@ -33,7 +33,6 @@ EXPORT_SYMBOL(__get_user_asm_w); EXPORT_SYMBOL(__get_user_asm_l); EXPORT_SYMBOL(__get_user_asm_q); EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(__copy_user); From cba8df4be3bdf10c86a26c458c5fc2ca978eeb2c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 4 Jun 2012 15:46:05 +0900 Subject: [PATCH 400/475] sh: use the new generic strnlen_user() function This discards both the _32 and _64 versions in favour of the consolidated generic one. Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 1 + arch/sh/include/asm/uaccess.h | 40 ++------------------- arch/sh/include/asm/uaccess_32.h | 36 ------------------- arch/sh/include/asm/uaccess_64.h | 2 -- arch/sh/include/asm/word-at-a-time.h | 53 ++++++++++++++++++++++++++++ arch/sh/kernel/cpu/sh5/entry.S | 41 --------------------- arch/sh/kernel/sh_ksyms_64.c | 1 - 7 files changed, 57 insertions(+), 117 deletions(-) create mode 100644 arch/sh/include/asm/word-at-a-time.h diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index cbffc26c043..31d9db7913e 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -33,6 +33,7 @@ config SUPERH select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index 3df7eee9c15..8698a80ed00 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -104,6 +104,9 @@ struct __large_struct { unsigned long buf[100]; }; extern long strncpy_from_user(char *dest, const char __user *src, long count); +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); + /* Generic arbitrary sized copy. */ /* Return the number of bytes NOT copied */ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); @@ -165,43 +168,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n) return __copy_size; } -/** - * strnlen_user: - Get the size of a string in user space. - * @s: The string to measure. - * @n: The maximum valid length - * - * Context: User context only. This function may sleep. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * If the string is too long, returns a value greater than @n. - */ -static inline long strnlen_user(const char __user *s, long n) -{ - if (!__addr_ok(s)) - return 0; - else - return __strnlen_user(s, n); -} - -/** - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * - * Context: User context only. This function may sleep. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) - /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is diff --git a/arch/sh/include/asm/uaccess_32.h b/arch/sh/include/asm/uaccess_32.h index ef44d05bb63..c0de7ee35ab 100644 --- a/arch/sh/include/asm/uaccess_32.h +++ b/arch/sh/include/asm/uaccess_32.h @@ -170,40 +170,4 @@ __asm__ __volatile__( \ extern void __put_user_unknown(void); -/* - * Return the size of a string (including the ending 0 even when we have - * exceeded the maximum string length). - */ -static inline long __strnlen_user(const char __user *__s, long __n) -{ - unsigned long res; - unsigned long __dummy; - - __asm__ __volatile__( - "1:\t" - "mov.b @(%0,%3), %1\n\t" - "cmp/eq %4, %0\n\t" - "bt/s 2f\n\t" - " add #1, %0\n\t" - "tst %1, %1\n\t" - "bf 1b\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3:\n\t" - "mov.l 4f, %1\n\t" - "jmp @%1\n\t" - " mov #0, %0\n" - ".balign 4\n" - "4: .long 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .balign 4\n" - " .long 1b,3b\n" - ".previous" - : "=z" (res), "=&r" (__dummy) - : "0" (0), "r" (__s), "r" (__n) - : "t"); - return res; -} - #endif /* __ASM_SH_UACCESS_32_H */ diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h index 0042c9054b2..2e07e0f40c6 100644 --- a/arch/sh/include/asm/uaccess_64.h +++ b/arch/sh/include/asm/uaccess_64.h @@ -84,6 +84,4 @@ extern long __put_user_asm_l(void *, long); extern long __put_user_asm_q(void *, long); extern void __put_user_unknown(void); -extern long __strnlen_user(const char *__s, long __n); - #endif /* __ASM_SH_UACCESS_64_H */ diff --git a/arch/sh/include/asm/word-at-a-time.h b/arch/sh/include/asm/word-at-a-time.h new file mode 100644 index 00000000000..6e38953ff7f --- /dev/null +++ b/arch/sh/include/asm/word-at-a-time.h @@ -0,0 +1,53 @@ +#ifndef __ASM_SH_WORD_AT_A_TIME_H +#define __ASM_SH_WORD_AT_A_TIME_H + +#ifdef CONFIG_CPU_BIG_ENDIAN +# include +#else +/* + * Little-endian version cribbed from x86. + */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +/* Return nonzero if it has a zero */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +/* The mask we created is directly usable as a bytemask */ +#define zero_bytemask(mask) (mask) + +static inline unsigned long find_zero(unsigned long mask) +{ + return count_masked_bytes(mask); +} +#endif + +#endif diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index b7b3f63299b..b7cf6a547f1 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -1568,46 +1568,6 @@ ___clear_user_exit: #endif /* CONFIG_MMU */ -/* - * extern long __strnlen_user(const char *__s, long __n) - * - * Inputs: - * (r2) source address - * (r3) source size in bytes - * - * Ouputs: - * (r2) -EFAULT (in case of faulting) - * string length (otherwise) - */ - .global __strnlen_user -__strnlen_user: - pta ___strnlen_user_set_reply, tr0 - pta ___strnlen_user1, tr1 - or ZERO, ZERO, r5 /* r5 = counter */ - movi -(EFAULT), r6 /* r6 = reply, no real fixup */ - or ZERO, ZERO, r7 /* r7 = data, clear top byte of data */ - beq r3, ZERO, tr0 - -___strnlen_user1: - ldx.b r2, r5, r7 /* Fault address: only in reading */ - addi r3, -1, r3 /* No real fixup */ - addi r5, 1, r5 - beq r3, ZERO, tr0 - bne r7, ZERO, tr1 -! The line below used to be active. This meant led to a junk byte lying between each pair -! of entries in the argv & envp structures in memory. Whilst the program saw the right data -! via the argv and envp arguments to main, it meant the 'flat' representation visible through -! /proc/$pid/cmdline was corrupt, causing trouble with ps, for example. -! addi r5, 1, r5 /* Include '\0' */ - -___strnlen_user_set_reply: - or r5, ZERO, r6 /* If done, return counter */ - -___strnlen_user_exit: - or r6, ZERO, r2 - ptabs LINK, tr0 - blink tr0, ZERO - /* * extern long __get_user_asm_?(void *val, long addr) * @@ -1942,7 +1902,6 @@ asm_uaccess_start: .long ___copy_user2, ___copy_user_exit .long ___clear_user1, ___clear_user_exit #endif - .long ___strnlen_user1, ___strnlen_user_exit .long ___get_user_asm_b1, ___get_user_asm_b_exit .long ___get_user_asm_w1, ___get_user_asm_w_exit .long ___get_user_asm_l1, ___get_user_asm_l_exit diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c index 3a3ba60e4f9..26a0774f527 100644 --- a/arch/sh/kernel/sh_ksyms_64.c +++ b/arch/sh/kernel/sh_ksyms_64.c @@ -32,7 +32,6 @@ EXPORT_SYMBOL(__get_user_asm_b); EXPORT_SYMBOL(__get_user_asm_w); EXPORT_SYMBOL(__get_user_asm_l); EXPORT_SYMBOL(__get_user_asm_q); -EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(__copy_user); From 4149268e7816d719b0fde8e89aaa6db8c168fc43 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 18 May 2012 13:57:19 -0700 Subject: [PATCH 401/475] target: Add TFO->put_session() caller for HW fabric session shutdown This patch adds an optional target_core_fabric_ops->put_session() caller within the existing target_put_session() code path. This is required by tcm_qla2xxx code in order to invoke it's own fabric specific session shutdown handler using se_session->sess_kref. Signed-off-by: Joern Engel Cc: Roland Dreier Cc: Arun Easi Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 8 +++++++- include/target/target_core_fabric.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index b05fdc0c05d..634d0f31a28 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -315,7 +315,7 @@ void transport_register_session( } EXPORT_SYMBOL(transport_register_session); -static void target_release_session(struct kref *kref) +void target_release_session(struct kref *kref) { struct se_session *se_sess = container_of(kref, struct se_session, sess_kref); @@ -332,6 +332,12 @@ EXPORT_SYMBOL(target_get_session); void target_put_session(struct se_session *se_sess) { + struct se_portal_group *tpg = se_sess->se_tpg; + + if (tpg->se_tpg_tfo->put_session != NULL) { + tpg->se_tpg_tfo->put_session(se_sess); + return; + } kref_put(&se_sess->sess_kref, target_release_session); } EXPORT_SYMBOL(target_put_session); diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 116959933f4..c78a23333c4 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -47,6 +47,7 @@ struct target_core_fabric_ops { */ int (*check_stop_free)(struct se_cmd *); void (*release_cmd)(struct se_cmd *); + void (*put_session)(struct se_session *); /* * Called with spin_lock_bh(struct se_portal_group->session_lock held. */ From 8e780be960c3e7000a94652bc56efeb3c932ee24 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 13 Jun 2012 11:36:36 +0900 Subject: [PATCH 402/475] sh: Fix up link time defsym warnings. sh-linux-gnu-ld:--defsym 'jiffies=jiffies_64': ignoring invalid character `'' in expression For some reason ld has recently started complaining about the quotes, so just get rid of them, we don't need them for anything anyways. Signed-off-by: Paul Mundt --- arch/sh/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/Makefile b/arch/sh/Makefile index 46edf070da1..dbf887edabd 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -114,11 +114,11 @@ endif ifdef CONFIG_CPU_LITTLE_ENDIAN ld-bfd := elf32-$(UTS_MACHINE)-linux -LDFLAGS_vmlinux += --defsym 'jiffies=jiffies_64' --oformat $(ld-bfd) +LDFLAGS_vmlinux += --defsym jiffies=jiffies_64 --oformat $(ld-bfd) LDFLAGS += -EL else ld-bfd := elf32-$(UTS_MACHINE)big-linux -LDFLAGS_vmlinux += --defsym 'jiffies=jiffies_64+4' --oformat $(ld-bfd) +LDFLAGS_vmlinux += --defsym jiffies=jiffies_64+4 --oformat $(ld-bfd) LDFLAGS += -EB endif From aaf68b753313f1e67fd2e8996e32ab2813f441fa Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 18 May 2012 13:58:23 -0700 Subject: [PATCH 403/475] tcm_qla2xxx: Convert to TFO->put_session() usage This patch converts tcm_qla2xxx code to use an internal kref_put() for se_session->sess_kref in order to ensure that qla_hw_data->hardware_lock can be held while calling qlt_unreg_sess() for the final put. Signed-off-by: Joern Engel Cc: Roland Dreier Cc: Arun Easi Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 436598f5740..3cee5b67d65 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -844,9 +844,28 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess) se_nacl, nacl->nport_wwnn, nacl->nport_id); } +static void tcm_qla2xxx_release_session(struct kref *kref) +{ + struct se_session *se_sess = container_of(kref, + struct se_session, sess_kref); + + qlt_unreg_sess(se_sess->fabric_sess_ptr); +} + +static void tcm_qla2xxx_put_session(struct se_session *se_sess) +{ + struct qla_tgt_sess *sess = se_sess->fabric_sess_ptr; + struct qla_hw_data *ha = sess->vha->hw; + unsigned long flags; + + spin_lock_irqsave(&ha->hardware_lock, flags); + kref_put(&se_sess->sess_kref, tcm_qla2xxx_release_session); + spin_unlock_irqrestore(&ha->hardware_lock, flags); +} + static void tcm_qla2xxx_put_sess(struct qla_tgt_sess *sess) { - target_put_session(sess->se_sess); + tcm_qla2xxx_put_session(sess->se_sess); } static void tcm_qla2xxx_shutdown_sess(struct qla_tgt_sess *sess) @@ -1731,6 +1750,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = { .new_cmd_map = NULL, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, + .put_session = tcm_qla2xxx_put_session, .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, @@ -1779,6 +1799,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .release_cmd = tcm_qla2xxx_release_cmd, + .put_session = tcm_qla2xxx_put_session, .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, From d14a5fdc26986f7bac8376a339f336be18ba2a90 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 12 May 2012 22:39:07 +0200 Subject: [PATCH 404/475] sh: Setup CROSS_COMPILE at the top CROSS_COMPILE must be setup before using e.g. cc-option (and a few other as-*, cc-*, ld-* macros), else they will check against the wrong compiler when cross-compiling, and may invoke the cross compiler with wrong or suboptimal compiler options. Signed-off-by: Geert Uytterhoeven Cc: Paul Mundt Cc: linux-sh@vger.kernel.org Signed-off-by: Paul Mundt --- arch/sh/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/sh/Makefile b/arch/sh/Makefile index dbf887edabd..aed701c7b11 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -9,6 +9,12 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # +ifneq ($(SUBARCH),$(ARCH)) + ifeq ($(CROSS_COMPILE),) + CROSS_COMPILE := $(call cc-cross-prefix, $(UTS_MACHINE)-linux- $(UTS_MACHINE)-linux-gnu- $(UTS_MACHINE)-unknown-linux-gnu-) + endif +endif + isa-y := any isa-$(CONFIG_SH_DSP) := sh isa-$(CONFIG_CPU_SH2) := sh2 @@ -106,12 +112,6 @@ LDFLAGS_vmlinux += --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \ KBUILD_DEFCONFIG := cayman_defconfig endif -ifneq ($(SUBARCH),$(ARCH)) - ifeq ($(CROSS_COMPILE),) - CROSS_COMPILE := $(call cc-cross-prefix, $(UTS_MACHINE)-linux- $(UTS_MACHINE)-linux-gnu- $(UTS_MACHINE)-unknown-linux-gnu-) - endif -endif - ifdef CONFIG_CPU_LITTLE_ENDIAN ld-bfd := elf32-$(UTS_MACHINE)-linux LDFLAGS_vmlinux += --defsym jiffies=jiffies_64 --oformat $(ld-bfd) From 1318002aebadf18217ad3de677b6c96b8140dcff Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 13 Jun 2012 11:59:47 +0900 Subject: [PATCH 405/475] sh: Kill off additional asm-generic wrappers. A few wrappers were overlooked in the initial conversion, take care of them now. Signed-off-by: Paul Mundt --- arch/sh/include/asm/Kbuild | 5 +++++ arch/sh/include/asm/delay.h | 1 - arch/sh/include/asm/kvm_para.h | 1 - arch/sh/include/asm/local64.h | 1 - arch/sh/include/asm/scatterlist.h | 6 ------ arch/sh/include/asm/sizes.h | 1 - 6 files changed, 5 insertions(+), 10 deletions(-) delete mode 100644 arch/sh/include/asm/delay.h delete mode 100644 arch/sh/include/asm/kvm_para.h delete mode 100644 arch/sh/include/asm/local64.h delete mode 100644 arch/sh/include/asm/scatterlist.h delete mode 100644 arch/sh/include/asm/sizes.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 09d1e60ef47..7b673ddcd55 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm generic-y += bitsperlong.h generic-y += cputime.h generic-y += current.h +generic-y += delay.h generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h @@ -10,7 +11,9 @@ generic-y += fcntl.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += kvm_para.h generic-y += local.h +generic-y += local64.h generic-y += param.h generic-y += parport.h generic-y += percpu.h @@ -18,10 +21,12 @@ generic-y += poll.h generic-y += mman.h generic-y += msgbuf.h generic-y += resource.h +generic-y += scatterlist.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h generic-y += siginfo.h +generic-y += sizes.h generic-y += socket.h generic-y += statfs.h generic-y += termbits.h diff --git a/arch/sh/include/asm/delay.h b/arch/sh/include/asm/delay.h deleted file mode 100644 index 9670e127b7b..00000000000 --- a/arch/sh/include/asm/delay.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/kvm_para.h b/arch/sh/include/asm/kvm_para.h deleted file mode 100644 index 14fab8f0b95..00000000000 --- a/arch/sh/include/asm/kvm_para.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h deleted file mode 100644 index 36c93b5cc23..00000000000 --- a/arch/sh/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sh/include/asm/scatterlist.h b/arch/sh/include/asm/scatterlist.h deleted file mode 100644 index 98dfc3510f1..00000000000 --- a/arch/sh/include/asm/scatterlist.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_SCATTERLIST_H -#define __ASM_SH_SCATTERLIST_H - -#include - -#endif /* __ASM_SH_SCATTERLIST_H */ diff --git a/arch/sh/include/asm/sizes.h b/arch/sh/include/asm/sizes.h deleted file mode 100644 index dd248c2e108..00000000000 --- a/arch/sh/include/asm/sizes.h +++ /dev/null @@ -1 +0,0 @@ -#include From f2d5d9b90b095ab0e8097b2b0793f4a56ed98147 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 18 May 2012 15:37:53 -0700 Subject: [PATCH 406/475] tcm_qla2xxx: Clear session s_id + loop_id earlier during shutdown This patch adds a new tcm_qla2xxx_clear_sess_lookup() call to clear session specific s_id + loop_id entries used for se_node_acl pointer lookup ahead of releasing se_session within the process context workqueue callback in tcm_qla2xxx_free_session(). It makes the call in existing tcm_qla2xxx_clear_nacl_from_fcport_map() code invoked from qlt_unreg_sess() in interrupt context w/ hardware_lock held, ahead of the process context callback into qlt_free_session_done() -> tcm_qla2xxx_free_session(). We are doing this to address a race between incoming ATIO or TMR packets using stale se_node_acl pointer once session shutdown has been invoked via qlt_unreg_sess() in qla_target.c LLD code, and when the entire tcm_qla2xxx endpoint has not been forced into shutdown w/ echo 0 > ../$QLA2XXX_PORT/enable Cc: Joern Engel Cc: Roland Dreier Cc: Arun Easi Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 48 +++++++++++++++++++----------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 3cee5b67d65..a641c970569 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -821,6 +821,8 @@ static int tcm_qla2xxx_setup_nacl_from_rport( return 0; } +static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, + struct tcm_qla2xxx_nacl *, struct qla_tgt_sess *); /* * Expected to be called with struct qla_hw_data->hardware_lock held */ @@ -842,6 +844,16 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess) pr_debug("Removed from fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%06x\n", se_nacl, nacl->nport_wwnn, nacl->nport_id); + /* + * Now clear the se_nacl and session pointers from our HW lport lookup + * table mapping for this initiator's fabric S_ID and LOOP_ID entries. + * + * This is done ahead of callbacks into tcm_qla2xxx_free_session() -> + * target_wait_for_sess_cmds() before the session waits for outstanding + * I/O to complete, to avoid a race between session shutdown execution + * and incoming ATIOs or TMRs picking up a stale se_node_act reference. + */ + tcm_qla2xxx_clear_sess_lookup(lport, nacl, sess); } static void tcm_qla2xxx_release_session(struct kref *kref) @@ -1409,6 +1421,25 @@ static void tcm_qla2xxx_set_sess_by_loop_id( nacl->qla_tgt_sess, new_se_nacl, new_se_nacl->initiatorname); } +/* + * Should always be called with qla_hw_data->hardware_lock held. + */ +static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport, + struct tcm_qla2xxx_nacl *nacl, struct qla_tgt_sess *sess) +{ + struct se_session *se_sess = sess->se_sess; + unsigned char be_sid[3]; + + be_sid[0] = sess->s_id.b.domain; + be_sid[1] = sess->s_id.b.area; + be_sid[2] = sess->s_id.b.al_pa; + + tcm_qla2xxx_set_sess_by_s_id(lport, NULL, nacl, se_sess, + sess, be_sid); + tcm_qla2xxx_set_sess_by_loop_id(lport, NULL, nacl, se_sess, + sess, sess->loop_id); +} + static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess) { struct qla_tgt *tgt = sess->tgt; @@ -1417,8 +1448,6 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess) struct se_node_acl *se_nacl; struct tcm_qla2xxx_lport *lport; struct tcm_qla2xxx_nacl *nacl; - unsigned char be_sid[3]; - unsigned long flags; BUG_ON(in_interrupt()); @@ -1438,21 +1467,6 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess) return; } target_wait_for_sess_cmds(se_sess, 0); - /* - * And now clear the se_nacl and session pointers from our HW lport - * mappings for fabric S_ID and LOOP_ID. - */ - memset(&be_sid, 0, 3); - be_sid[0] = sess->s_id.b.domain; - be_sid[1] = sess->s_id.b.area; - be_sid[2] = sess->s_id.b.al_pa; - - spin_lock_irqsave(&ha->hardware_lock, flags); - tcm_qla2xxx_set_sess_by_s_id(lport, NULL, nacl, se_sess, - sess, be_sid); - tcm_qla2xxx_set_sess_by_loop_id(lport, NULL, nacl, se_sess, - sess, sess->loop_id); - spin_unlock_irqrestore(&ha->hardware_lock, flags); transport_deregister_session_configfs(sess->se_sess); transport_deregister_session(sess->se_sess); From 59e4f541baf728dbb426949bfa9f6862387ffd0e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 4 Jun 2012 23:24:51 -0700 Subject: [PATCH 407/475] target: Return error to initiator if SET TARGET PORT GROUPS emulation fails The error paths in target_emulate_set_target_port_groups() are all essentially "rc = -EINVAL; goto out;" but the code at "out:" ignores rc and always returns success. This means that even if eg explicit ALUA is turned off, the initiator will always see a good SCSI status for SET TARGET PORT GROUPS. Fix this by returning rc as is intended. It appears this bug was added by the following patch: commit 05d1c7c0d0db4cc25548d9aadebb416888a82327 Author: Andy Grover Date: Wed Jul 20 19:13:28 2011 +0000 target: Make all control CDBs scatter-gather Signed-off-by: Roland Dreier Cc: Andy Grover Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index e624b836469..91799973081 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -374,8 +374,9 @@ int target_emulate_set_target_port_groups(struct se_cmd *cmd) out: transport_kunmap_data_sg(cmd); - target_complete_cmd(cmd, GOOD); - return 0; + if (!rc) + target_complete_cmd(cmd, GOOD); + return rc; } static inline int core_alua_state_nonoptimized( From 3578ddba1ae93263d373e7bc85fd38d1f0368b78 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 4 Jun 2012 23:37:34 -0700 Subject: [PATCH 408/475] tcm_qla2xxx: Don't insert nacls without sessions into the btree When we create an explicit node ACL in tcm_qla2xxx_make_nodeacl(), there is a call to tcm_qla2xxx_setup_nacl_from_rport(), which puts the node ACL into the lport_fcport_map even though there is no session yet for the initiator. Since the only time we remove entries from this map is when we free a session, this means that if we later delete this node ACL without the initiator ever creating a session, we'll leave the nacl pointer in the btree pointing at freed memory. This is especially bad if that initiator later does send us a command that would cause us to create a dynamic ACL and session: we'll find the stale freed nacl pointer in the btree and end up with use-after-free. We could add more code to clear the btree entry when deleting the explicit nacl, but the original insertion is pointless: without a session attached, we'll just have to update the entry when a session appears anyway. So we can just delete tcm_qla2xxx_setup_nacl_from_rport() and the code that calls it. Signed-off-by: Roland Dreier Cc: Chad Dupuis Cc: Giridhar Malavali Cc: Arun Easi Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 73 ------------------------------ 1 file changed, 73 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index a641c970569..acc3b1151c4 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -762,65 +762,6 @@ static u16 tcm_qla2xxx_set_fabric_sense_len(struct se_cmd *se_cmd, struct target_fabric_configfs *tcm_qla2xxx_fabric_configfs; struct target_fabric_configfs *tcm_qla2xxx_npiv_fabric_configfs; -static int tcm_qla2xxx_setup_nacl_from_rport( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct tcm_qla2xxx_lport *lport, - struct tcm_qla2xxx_nacl *nacl, - u64 rport_wwnn) -{ - struct scsi_qla_host *vha = lport->qla_vha; - struct Scsi_Host *sh = vha->host; - struct fc_host_attrs *fc_host = shost_to_fc_host(sh); - struct fc_rport *rport; - unsigned long flags; - void *node; - int rc; - - /* - * Scan the existing rports, and create a session for the - * explict NodeACL is an matching rport->node_name already - * exists. - */ - spin_lock_irqsave(sh->host_lock, flags); - list_for_each_entry(rport, &fc_host->rports, peers) { - if (rport_wwnn != rport->node_name) - continue; - - pr_debug("Located existing rport_wwpn and rport->node_name: 0x%016LX, port_id: 0x%04x\n", - rport->node_name, rport->port_id); - nacl->nport_id = rport->port_id; - - spin_unlock_irqrestore(sh->host_lock, flags); - - spin_lock_irqsave(&vha->hw->hardware_lock, flags); - node = btree_lookup32(&lport->lport_fcport_map, rport->port_id); - if (node) { - rc = btree_update32(&lport->lport_fcport_map, - rport->port_id, se_nacl); - } else { - rc = btree_insert32(&lport->lport_fcport_map, - rport->port_id, se_nacl, - GFP_ATOMIC); - } - spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); - - if (rc) { - pr_err("Unable to insert se_nacl into fcport_map"); - WARN_ON(rc > 0); - return rc; - } - - pr_debug("Inserted into fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%08x\n", - se_nacl, rport_wwnn, nacl->nport_id); - - return 1; - } - spin_unlock_irqrestore(sh->host_lock, flags); - - return 0; -} - static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, struct tcm_qla2xxx_nacl *, struct qla_tgt_sess *); /* @@ -890,14 +831,10 @@ static struct se_node_acl *tcm_qla2xxx_make_nodeacl( struct config_group *group, const char *name) { - struct se_wwn *se_wwn = se_tpg->se_tpg_wwn; - struct tcm_qla2xxx_lport *lport = container_of(se_wwn, - struct tcm_qla2xxx_lport, lport_wwn); struct se_node_acl *se_nacl, *se_nacl_new; struct tcm_qla2xxx_nacl *nacl; u64 wwnn; u32 qla2xxx_nexus_depth; - int rc; if (tcm_qla2xxx_parse_wwn(name, &wwnn, 1) < 0) return ERR_PTR(-EINVAL); @@ -924,16 +861,6 @@ static struct se_node_acl *tcm_qla2xxx_make_nodeacl( nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl); nacl->nport_wwnn = wwnn; tcm_qla2xxx_format_wwn(&nacl->nport_name[0], TCM_QLA2XXX_NAMELEN, wwnn); - /* - * Setup a se_nacl handle based on an a matching struct fc_rport setup - * via drivers/scsi/qla2xxx/qla_init.c:qla2x00_reg_remote_port() - */ - rc = tcm_qla2xxx_setup_nacl_from_rport(se_tpg, se_nacl, lport, - nacl, wwnn); - if (rc < 0) { - tcm_qla2xxx_release_fabric_acl(se_tpg, se_nacl_new); - return ERR_PTR(rc); - } return se_nacl; } From 092e1dc3f227ebef9ad45c26ef05c283ca4495a5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 11 Jun 2012 18:23:15 -0700 Subject: [PATCH 409/475] qla2xxx: Don't crash if we can't find cmd for failed CTIO In qlt_do_ctio_completion(), there's no point in calling qlt_term_ctio_exchange() with a NULL cmd -- all that it does is crash in a NULL pointer dereference, since it does qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); and dereferencing &cmd->atio is a bad idea if cmd itself is NULL. If we really need to do this, we could take the values from the failed CTIO we're processing, but it's not clear if it's worth the replumbing to do that. Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 04f80ebf09e..c263f9016de 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2477,11 +2477,9 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle, } cmd = qlt_ctio_to_cmd(vha, handle, ctio); - if (cmd == NULL) { - if (status != CTIO_SUCCESS) - qlt_term_ctio_exchange(vha, ctio, NULL, status); + if (cmd == NULL) return; - } + se_cmd = &cmd->se_cmd; tfo = se_cmd->se_tfo; From fae9eaf81323eda64f28b9f4f4aeffb807e5b5f4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 11 Jun 2012 18:23:16 -0700 Subject: [PATCH 410/475] qla2xxx: Don't leak commands we give up on in qlt_do_work() If we go to the "out_term:" exit path in qlt_do_work(), we call qlt_send_term_exchange() with a NULL cmd, which means that it can't possibly free the cmd for us. Add an explicit call to free the command memory, so we don't leak the allocation. This will also fix warnings about "BUG qla_tgt_cmd_cachep: Objects remaining on kmem_cache_close" from slub when unloading the qla2xxx target module. Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index c263f9016de..e91292099a0 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2725,10 +2725,12 @@ static void qlt_do_work(struct work_struct *work) out_term: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf020, "Terminating work cmd %p", cmd); /* - * cmd has not sent to target yet, so pass NULL as the second argument + * cmd has not sent to target yet, so pass NULL as the second + * argument to qlt_send_term_exchange() and free the memory here. */ spin_lock_irqsave(&ha->hardware_lock, flags); qlt_send_term_exchange(vha, NULL, &cmd->atio, 1); + kmem_cache_free(qla_tgt_cmd_cachep, cmd); spin_unlock_irqrestore(&ha->hardware_lock, flags); if (sess) ha->tgt.tgt_ops->put_sess(sess); From 9389c3c943da7e5f903eebf79d596601537afe01 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 11 Jun 2012 18:31:30 -0700 Subject: [PATCH 411/475] tcm_qla2xxx: tcm_qla2xxx_handle_tmr() can be static Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index acc3b1151c4..2f12c092133 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -652,8 +652,8 @@ static int tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd) /* * Called from qla_target.c:qlt_issue_task_mgmt() */ -int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun, - uint8_t tmr_func, uint32_t tag) +static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun, + uint8_t tmr_func, uint32_t tag) { struct qla_tgt_sess *sess = mcmd->sess; struct se_cmd *se_cmd = &mcmd->se_cmd; From d4f75b567bb63b51e5ecd42af1e82d5aed5100dd Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 11 Jun 2012 18:31:31 -0700 Subject: [PATCH 412/475] tcm_qla2xxx: Handle malformed wwn strings properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we make a variable an unsigned int and then expect it to be < 0 on a bad character, we're going to have a bad time. Fix the tcm_qla2xxx code to actually notice if hex_to_bin() returns a negative variable. This was detected by the compiler warning: scsi/qla2xxx/tcm_qla2xxx.c: In function ‘tcm_qla2xxx_npiv_extract_wwn’: scsi/qla2xxx/tcm_qla2xxx.c:148:3: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 2f12c092133..6e64314dbbb 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -137,13 +137,15 @@ static char *tcm_qla2xxx_get_fabric_name(void) */ static int tcm_qla2xxx_npiv_extract_wwn(const char *ns, u64 *nm) { - unsigned int i, j, value; + unsigned int i, j; u8 wwn[8]; memset(wwn, 0, sizeof(wwn)); /* Validate and store the new name */ for (i = 0, j = 0; i < 16; i++) { + int value; + value = hex_to_bin(*ns++); if (value >= 0) j = (j << 4) | value; From 5134de2815ac723ecfba4a1c7b2a90fa4d8dcfd9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Sun, 3 Jun 2012 22:27:01 +0530 Subject: [PATCH 413/475] qla2xxx: Remove version.h header file inclusion version.h header file is no longer required for qla_target code. Signed-off-by: Sachin Kamat Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 1 - drivers/scsi/qla2xxx/qla_target.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index e91292099a0..6986552b47e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 9ec19bc2f0f..9f9ef1644fd 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -919,7 +919,6 @@ struct qla_tgt_srr_ctio { #define QLA_TGT_XMIT_STATUS 2 #define QLA_TGT_XMIT_ALL (QLA_TGT_XMIT_STATUS|QLA_TGT_XMIT_DATA) -#include extern struct qla_tgt_data qla_target; /* From fdb1117325ad719dc39e81209bc622d511db70e0 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 13 Jun 2012 14:04:58 +0200 Subject: [PATCH 414/475] ARM: dma-mapping: fix debug messages in dmabounce code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the usage of uninitialized variables in dmabounce code intoduced by commit a227fb92 ('ARM: dma-mapping: remove offset parameter to prepare for generic dma_ops'): arch/arm/common/dmabounce.c: In function ‘dmabounce_sync_for_device’: arch/arm/common/dmabounce.c:409: warning: ‘off’ may be used uninitialized in this function arch/arm/common/dmabounce.c:407: note: ‘off’ was declared here arch/arm/common/dmabounce.c: In function ‘dmabounce_sync_for_cpu’: arch/arm/common/dmabounce.c:369: warning: ‘off’ may be used uninitialized in this function arch/arm/common/dmabounce.c:367: note: ‘off’ was declared here Signed-off-by: Marek Szyprowski --- arch/arm/common/dmabounce.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 9d7eb530f95..aa07f5938f0 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -366,8 +366,8 @@ static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, struct safe_buffer *buf; unsigned long off; - dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", - __func__, addr, off, sz, dir); + dev_dbg(dev, "%s(dma=%#x,sz=%zx,dir=%x)\n", + __func__, addr, sz, dir); buf = find_safe_buffer_dev(dev, addr, __func__); if (!buf) @@ -377,8 +377,8 @@ static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, BUG_ON(buf->direction != dir); - dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", - __func__, buf->ptr, virt_to_dma(dev, buf->ptr), + dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x off=%#lx) mapped to %p (dma=%#x)\n", + __func__, buf->ptr, virt_to_dma(dev, buf->ptr), off, buf->safe, buf->safe_dma_addr); DO_STATS(dev->archdata.dmabounce->bounce_count++); @@ -406,8 +406,8 @@ static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, struct safe_buffer *buf; unsigned long off; - dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", - __func__, addr, off, sz, dir); + dev_dbg(dev, "%s(dma=%#x,sz=%zx,dir=%x)\n", + __func__, addr, sz, dir); buf = find_safe_buffer_dev(dev, addr, __func__); if (!buf) @@ -417,8 +417,8 @@ static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, BUG_ON(buf->direction != dir); - dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", - __func__, buf->ptr, virt_to_dma(dev, buf->ptr), + dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x off=%#lx) mapped to %p (dma=%#x)\n", + __func__, buf->ptr, virt_to_dma(dev, buf->ptr), off, buf->safe, buf->safe_dma_addr); DO_STATS(dev->archdata.dmabounce->bounce_count++); From 161270fc1f9ddfc17154e0d49291472a9cdef7db Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 6 Jun 2012 17:31:26 +0200 Subject: [PATCH 415/475] x86/smp: Fix topology checks on AMD MCM CPUs The warning below triggers on AMD MCM packages because physical package IDs on the cores of a _physical_ socket are the same. I.e., this field says which CPUs belong to the same physical package. However, the same two CPUs belong to two different internal, i.e. "logical" nodes in the same physical socket which is reflected in the CPU-to-node map on x86 with NUMA. Which makes this check wrong on the above topologies so circumvent it. [ 0.444413] Booting Node 0, Processors #1 #2 #3 #4 #5 Ok. [ 0.461388] ------------[ cut here ]------------ [ 0.465997] WARNING: at arch/x86/kernel/smpboot.c:310 topology_sane.clone.1+0x6e/0x81() [ 0.473960] Hardware name: Dinar [ 0.477170] sched: CPU #6's mc-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency. [ 0.486860] Booting Node 1, Processors #6 [ 0.491104] Modules linked in: [ 0.494141] Pid: 0, comm: swapper/6 Not tainted 3.4.0+ #1 [ 0.499510] Call Trace: [ 0.501946] [] ? topology_sane.clone.1+0x6e/0x81 [ 0.508185] [] warn_slowpath_common+0x85/0x9d [ 0.514163] [] warn_slowpath_fmt+0x46/0x48 [ 0.519881] [] topology_sane.clone.1+0x6e/0x81 [ 0.525943] [] set_cpu_sibling_map+0x251/0x371 [ 0.532004] [] start_secondary+0x19a/0x218 [ 0.537729] ---[ end trace 4eaa2a86a8e2da22 ]--- [ 0.628197] #7 #8 #9 #10 #11 Ok. [ 0.807108] Booting Node 3, Processors #12 #13 #14 #15 #16 #17 Ok. [ 0.897587] Booting Node 2, Processors #18 #19 #20 #21 #22 #23 Ok. [ 0.917443] Brought up 24 CPUs We ran a topology sanity check test we have here on it and it all looks ok... hopefully :). Signed-off-by: Borislav Petkov Cc: Andreas Herrmann Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529135442.GE29157@aftab.osrc.amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fd019d78b1f..c3a6bacc8a3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -349,9 +349,12 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { - if (c->phys_proc_id == o->phys_proc_id) - return topology_sane(c, o, "mc"); + if (c->phys_proc_id == o->phys_proc_id) { + if (cpu_has(c, X86_FEATURE_AMD_DCM)) + return true; + return topology_sane(c, o, "mc"); + } return false; } From 25f42985825dd93f0593efe454e54c2aa13f7830 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 11 Jun 2012 15:44:26 +0200 Subject: [PATCH 416/475] perf/x86: Fix broken LBR fixup code I noticed that the LBR fixups were not working anymore on programs where they used to. I tracked this down to a recent change to copy_from_user_nmi(): db0dc75d640 ("perf/x86: Check user address explicitly in copy_from_user_nmi()") This commit added a call to __range_not_ok() to the copy_from_user_nmi() routine. The problem is that the logic of the test must be reversed. __range_not_ok() returns 0 if the range is VALID. We want to return early from copy_from_user_nmi() if the range is NOT valid. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Acked-by: Arun Sharma Link: http://lkml.kernel.org/r/20120611134426.GA7542@quad Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 677b1ed184c..4f74d94c8d9 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -22,7 +22,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) void *map; int ret; - if (__range_not_ok(from, n, TASK_SIZE) == 0) + if (__range_not_ok(from, n, TASK_SIZE)) return len; do { From 2e8b2b29d1f904353c3e54b342ccb8c66390dab8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Jun 2012 16:47:32 +0200 Subject: [PATCH 417/475] ALSA: hda - Don't forget to call init verbs added by fixup list During the split to the auto-parser helper functions, the actual call of init verbs was lost. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=43366 Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 2 +- sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 3acb5824ad3..172370b3793 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -4061,7 +4061,7 @@ static void cx_auto_init_digital(struct hda_codec *codec) static int cx_auto_init(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; - /*snd_hda_sequence_write(codec, cx_auto_init_verbs);*/ + snd_hda_gen_apply_verbs(codec); cx_auto_init_output(codec); cx_auto_init_input(codec); cx_auto_init_digital(codec); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b8966817ccf..f8f4906e498 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1896,6 +1896,7 @@ static int alc_init(struct hda_codec *codec) alc_fix_pll(codec); alc_auto_init_amp(codec, spec->init_amp); + snd_hda_gen_apply_verbs(codec); alc_init_special_input_src(codec); alc_auto_init_std(codec); From ecd57ae28eb34e2b34fac1c5b74bb876efa665c0 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Jun 2012 11:26:41 +0900 Subject: [PATCH 418/475] video: s3c-fb: clear SHADOWCON register when clearing hardware window registers All bits of SHADOWCON register should be cleared when clearing hardware window registers; however, some bits of SHADOWCON register are not cleared previously. Signed-off-by: Jingoo Han Signed-off-by: Florian Tobias Schandinat --- drivers/video/s3c-fb.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c index 5f9d8e69029..b5c29399a5e 100644 --- a/drivers/video/s3c-fb.c +++ b/drivers/video/s3c-fb.c @@ -1348,8 +1348,14 @@ static void s3c_fb_clear_win(struct s3c_fb *sfb, int win) writel(0, regs + VIDOSD_A(win, sfb->variant)); writel(0, regs + VIDOSD_B(win, sfb->variant)); writel(0, regs + VIDOSD_C(win, sfb->variant)); - reg = readl(regs + SHADOWCON); - writel(reg & ~SHADOWCON_WINx_PROTECT(win), regs + SHADOWCON); + + if (sfb->variant.has_shadowcon) { + reg = readl(sfb->regs + SHADOWCON); + reg &= ~(SHADOWCON_WINx_PROTECT(win) | + SHADOWCON_CHx_ENABLE(win) | + SHADOWCON_CHx_LOCAL_ENABLE(win)); + writel(reg, sfb->regs + SHADOWCON); + } } static int __devinit s3c_fb_probe(struct platform_device *pdev) From b67989515defba7412acff01162e5bb1f0f5923a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Jun 2012 11:27:27 +0900 Subject: [PATCH 419/475] video: s3c-fb: fix possible division by zero in s3c_fb_calc_pixclk Divider value can be zero and it makes division by zero from debug message in s3c_fb_calc_pixclk; therefore, it should be fixed. Signed-off-by: Jingoo Han Signed-off-by: Florian Tobias Schandinat --- drivers/video/s3c-fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c index b5c29399a5e..ea7b661e722 100644 --- a/drivers/video/s3c-fb.c +++ b/drivers/video/s3c-fb.c @@ -361,7 +361,7 @@ static int s3c_fb_calc_pixclk(struct s3c_fb *sfb, unsigned int pixclk) result = (unsigned int)tmp / 1000; dev_dbg(sfb->dev, "pixclk=%u, clk=%lu, div=%d (%lu)\n", - pixclk, clk, result, clk / result); + pixclk, clk, result, result ? clk / result : clk); return result; } From c2fb8a3fa25513de8fedb38509b1f15a5bbee47b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 13 Jun 2012 11:20:19 -0400 Subject: [PATCH 420/475] USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch (as1558) fixes a problem affecting several ASUS computers: The machine crashes or corrupts memory when going into suspend if the ehci-hcd driver is bound to any controllers. Users have been forced to unbind or unload ehci-hcd before putting their systems to sleep. After extensive testing, it was determined that the machines don't like going into suspend when any EHCI controllers are in the PCI D3 power state. Presumably this is a firmware bug, but there's nothing we can do about it except to avoid putting the controllers in D3 during system sleep. The patch adds a new flag to indicate whether the problem is present, and avoids changing the controller's power state if the flag is set. Runtime suspend is unaffected; this matters only for system suspend. However as a side effect, the controller will not respond to remote wakeup requests while the system is asleep. Hence USB wakeup is not functional -- but of course, this is already true in the current state of affairs. A similar patch has already been applied as commit 151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during suspend on ASUS computers). The patch supersedes that one and reverts it. There are two differences: The old patch added the flag at the USB level; this patch adds it at the PCI level. The old patch applied to all chipsets with the same vendor, subsystem vendor, and product IDs; this patch makes an exception for a known-good system (based on DMI information). Signed-off-by: Alan Stern Tested-by: Dâniel Fraga Tested-by: Andrey Rahmatullin Tested-by: Steven Rostedt Cc: stable Reviewed-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 5 +++++ drivers/pci/quirks.c | 26 ++++++++++++++++++++++++++ drivers/usb/core/hcd-pci.c | 9 --------- drivers/usb/host/ehci-pci.c | 8 -------- include/linux/pci.h | 2 ++ include/linux/usb/hcd.h | 2 -- 6 files changed, 33 insertions(+), 19 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 447e83472c0..77cb54a65cd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1744,6 +1744,11 @@ int pci_prepare_to_sleep(struct pci_dev *dev) if (target_state == PCI_POWER_ERROR) return -EIO; + /* Some devices mustn't be in D3 during system sleep */ + if (target_state == PCI_D3hot && + (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)) + return 0; + pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev)); error = pci_set_power_state(dev, target_state); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 2a752167754..194b243a281 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2929,6 +2929,32 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); +/* + * The Intel 6 Series/C200 Series chipset's EHCI controllers on many + * ASUS motherboards will cause memory corruption or a system crash + * if they are in D3 while the system is put into S3 sleep. + */ +static void __devinit asus_ehci_no_d3(struct pci_dev *dev) +{ + const char *sys_info; + static const char good_Asus_board[] = "P8Z68-V"; + + if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP) + return; + if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK) + return; + sys_info = dmi_get_system_info(DMI_BOARD_NAME); + if (sys_info && memcmp(sys_info, good_Asus_board, + sizeof(good_Asus_board) - 1) == 0) + return; + + dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n"); + dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP; + device_set_wakeup_capable(&dev->dev, false); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 57ed9e400c0..622b4a48e73 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -493,15 +493,6 @@ static int hcd_pci_suspend_noirq(struct device *dev) pci_save_state(pci_dev); - /* - * Some systems crash if an EHCI controller is in D3 during - * a sleep transition. We have to leave such controllers in D0. - */ - if (hcd->broken_pci_sleep) { - dev_dbg(dev, "Staying in PCI D0\n"); - return retval; - } - /* If the root hub is dead rather than suspended, disallow remote * wakeup. usb_hc_died() should ensure that both hosts are marked as * dying, so we only need to check the primary roothub. diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index bc94d7bf072..123481793a4 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -144,14 +144,6 @@ static int ehci_pci_setup(struct usb_hcd *hcd) hcd->has_tt = 1; tdi_reset(ehci); } - if (pdev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK) { - /* EHCI #1 or #2 on 6 Series/C200 Series chipset */ - if (pdev->device == 0x1c26 || pdev->device == 0x1c2d) { - ehci_info(ehci, "broken D3 during system sleep on ASUS\n"); - hcd->broken_pci_sleep = 1; - device_set_wakeup_capable(&pdev->dev, false); - } - } break; case PCI_VENDOR_ID_TDI: if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) { diff --git a/include/linux/pci.h b/include/linux/pci.h index d8c379dba6a..fefb4e19bf6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -176,6 +176,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + /* Device causes system crash if in D3 during S3 sleep */ + PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 7f855d50cdf..49b3ac29726 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -126,8 +126,6 @@ struct usb_hcd { unsigned wireless:1; /* Wireless USB HCD */ unsigned authorized_default:1; unsigned has_tt:1; /* Integrated TT in root hub */ - unsigned broken_pci_sleep:1; /* Don't put the - controller in PCI-D3 for system sleep */ unsigned int irq; /* irq allocated */ void __iomem *regs; /* device memory/io */ From 954c3f8a5f1b7716be9eee978b3bc85bae92d7c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 30 May 2012 10:00:14 +0200 Subject: [PATCH 421/475] USB: serial: Enforce USB driver and USB serial driver match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to make sure that the USB serial driver we find matches the USB driver whose probe we are currently executing. Otherwise we will end up with USB serial devices bound to the correct serial driver but wrong USB driver. An example of such cross-probing, where the usbserial_generic USB driver has found the sierra serial driver: May 29 18:26:15 nemi kernel: [ 4442.559246] usbserial_generic 4-4:1.0: Sierra USB modem converter detected May 29 18:26:20 nemi kernel: [ 4447.556747] usbserial_generic 4-4:1.2: Sierra USB modem converter detected May 29 18:26:25 nemi kernel: [ 4452.557288] usbserial_generic 4-4:1.3: Sierra USB modem converter detected sysfs view of the same problem: bjorn@nemi:~$ ls -l /sys/bus/usb/drivers/sierra/ total 0 --w------- 1 root root 4096 May 29 18:23 bind lrwxrwxrwx 1 root root 0 May 29 18:23 module -> ../../../../module/usbserial --w------- 1 root root 4096 May 29 18:23 uevent --w------- 1 root root 4096 May 29 18:23 unbind bjorn@nemi:~$ ls -l /sys/bus/usb-serial/drivers/sierra/ total 0 --w------- 1 root root 4096 May 29 18:23 bind lrwxrwxrwx 1 root root 0 May 29 18:23 module -> ../../../../module/sierra -rw-r--r-- 1 root root 4096 May 29 18:23 new_id lrwxrwxrwx 1 root root 0 May 29 18:32 ttyUSB0 -> ../../../../devices/pci0000:00/0000:00:1d.7/usb4/4-4/4-4:1.0/ttyUSB0 lrwxrwxrwx 1 root root 0 May 29 18:32 ttyUSB1 -> ../../../../devices/pci0000:00/0000:00:1d.7/usb4/4-4/4-4:1.2/ttyUSB1 lrwxrwxrwx 1 root root 0 May 29 18:32 ttyUSB2 -> ../../../../devices/pci0000:00/0000:00:1d.7/usb4/4-4/4-4:1.3/ttyUSB2 --w------- 1 root root 4096 May 29 18:23 uevent --w------- 1 root root 4096 May 29 18:23 unbind bjorn@nemi:~$ ls -l /sys/bus/usb/drivers/usbserial_generic/ total 0 lrwxrwxrwx 1 root root 0 May 29 18:33 4-4:1.0 -> ../../../../devices/pci0000:00/0000:00:1d.7/usb4/4-4/4-4:1.0 lrwxrwxrwx 1 root root 0 May 29 18:33 4-4:1.2 -> ../../../../devices/pci0000:00/0000:00:1d.7/usb4/4-4/4-4:1.2 lrwxrwxrwx 1 root root 0 May 29 18:33 4-4:1.3 -> ../../../../devices/pci0000:00/0000:00:1d.7/usb4/4-4/4-4:1.3 --w------- 1 root root 4096 May 29 18:33 bind lrwxrwxrwx 1 root root 0 May 29 18:33 module -> ../../../../module/usbserial --w------- 1 root root 4096 May 29 18:22 uevent --w------- 1 root root 4096 May 29 18:33 unbind bjorn@nemi:~$ ls -l /sys/bus/usb-serial/drivers/generic/ total 0 --w------- 1 root root 4096 May 29 18:33 bind lrwxrwxrwx 1 root root 0 May 29 18:33 module -> ../../../../module/usbserial -rw-r--r-- 1 root root 4096 May 29 18:33 new_id --w------- 1 root root 4096 May 29 18:22 uevent --w------- 1 root root 4096 May 29 18:33 unbind So we end up with a mismatch between the USB driver and the USB serial driver. The reason for the above is simple: The USB driver probe will succeed if *any* registered serial driver matches, and will use that serial driver for all serial driver functions. This makes ref counting go wrong. We count the USB driver as used, but not the USB serial driver. This may result in Oops'es as demonstrated by Johan Hovold : [11811.646396] drivers/usb/serial/usb-serial.c: get_free_serial 1 [11811.646443] drivers/usb/serial/usb-serial.c: get_free_serial - minor base = 0 [11811.646460] drivers/usb/serial/usb-serial.c: usb_serial_probe - registering ttyUSB0 [11811.646766] usb 6-1: pl2303 converter now attached to ttyUSB0 [11812.264197] USB Serial deregistering driver FTDI USB Serial Device [11812.264865] usbcore: deregistering interface driver ftdi_sio [11812.282180] USB Serial deregistering driver pl2303 [11812.283141] pl2303 ttyUSB0: pl2303 converter now disconnected from ttyUSB0 [11812.283272] usbcore: deregistering interface driver pl2303 [11812.301056] USB Serial deregistering driver generic [11812.301186] usbcore: deregistering interface driver usbserial_generic [11812.301259] drivers/usb/serial/usb-serial.c: usb_serial_disconnect [11812.301823] BUG: unable to handle kernel paging request at f8e7438c [11812.301845] IP: [] usb_serial_disconnect+0xb5/0x100 [usbserial] [11812.301871] *pde = 357ef067 *pte = 00000000 [11812.301957] Oops: 0000 [#1] PREEMPT SMP [11812.301983] Modules linked in: usbserial(-) [last unloaded: pl2303] [11812.302008] [11812.302019] Pid: 1323, comm: modprobe Tainted: G W 3.4.0-rc7+ #101 Dell Inc. Vostro 1520/0T816J [11812.302115] EIP: 0060:[] EFLAGS: 00010246 CPU: 1 [11812.302130] EIP is at usb_serial_disconnect+0xb5/0x100 [usbserial] [11812.302141] EAX: f508a180 EBX: f508a180 ECX: 00000000 EDX: f8e74300 [11812.302151] ESI: f5050800 EDI: 00000001 EBP: f5141e78 ESP: f5141e58 [11812.302160] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [11812.302170] CR0: 8005003b CR2: f8e7438c CR3: 34848000 CR4: 000007d0 [11812.302180] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 [11812.302189] DR6: ffff0ff0 DR7: 00000400 [11812.302199] Process modprobe (pid: 1323, ti=f5140000 task=f61e2bc0 task.ti=f5140000) [11812.302209] Stack: [11812.302216] f8e3be0f f8e3b29c f8e3ae00 00000000 f513641c f5136400 f513641c f507a540 [11812.302325] f5141e98 c133d2c1 00000000 00000000 f509c400 f513641c f507a590 f5136450 [11812.302372] f5141ea8 c12f0344 f513641c f507a590 f5141ebc c12f0c67 00000000 f507a590 [11812.302419] Call Trace: [11812.302439] [] usb_unbind_interface+0x51/0x190 [11812.302456] [] __device_release_driver+0x64/0xb0 [11812.302469] [] driver_detach+0x97/0xa0 [11812.302483] [] bus_remove_driver+0x6c/0xe0 [11812.302500] [] ? __mutex_unlock_slowpath+0xcd/0x140 [11812.302514] [] driver_unregister+0x49/0x80 [11812.302528] [] ? printk+0x1d/0x1f [11812.302540] [] usb_deregister+0x5d/0xb0 [11812.302557] [] ? usb_serial_deregister+0x45/0x50 [usbserial] [11812.302575] [] usb_serial_deregister_drivers+0x2d/0x40 [usbserial] [11812.302593] [] usb_serial_generic_deregister+0x12/0x20 [usbserial] [11812.302611] [] usb_serial_exit+0x8/0x32 [usbserial] [11812.302716] [] sys_delete_module+0x158/0x260 [11812.302730] [] ? mntput+0x1e/0x30 [11812.302746] [] ? sysenter_exit+0xf/0x18 [11812.302746] [] ? trace_hardirqs_on_caller+0xec/0x170 [11812.302746] [] sysenter_do_call+0x12/0x36 [11812.302746] Code: 24 02 00 00 e8 dd f3 20 c8 f6 86 74 02 00 00 02 74 b4 8d 86 4c 02 00 00 47 e8 78 55 4b c8 0f b6 43 0e 39 f8 7f a9 8b 53 04 89 d8 92 8c 00 00 00 89 d8 e8 0e ff ff ff 8b 45 f0 c7 44 24 04 2f [11812.302746] EIP: [] usb_serial_disconnect+0xb5/0x100 [usbserial] SS:ESP 0068:f5141e58 [11812.302746] CR2: 00000000f8e7438c Fix by only evaluating serial drivers pointing back to the USB driver we are currently probing. This still allows two or more drivers to match the same device, running their serial driver probes to sort out which one to use. Cc: stable@vger.kernel.org # 3.0, 3.2, 3.3, 3.4 Signed-off-by: Bjørn Mork Reviewed-by: Felipe Balbi Tested-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 6a1b609a0d9..6e8c527e07c 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -659,12 +659,14 @@ exit: static struct usb_serial_driver *search_serial_device( struct usb_interface *iface) { - const struct usb_device_id *id; + const struct usb_device_id *id = NULL; struct usb_serial_driver *drv; + struct usb_driver *driver = to_usb_driver(iface->dev.driver); /* Check if the usb id matches a known device */ list_for_each_entry(drv, &usb_serial_driver_list, driver_list) { - id = get_iface_id(drv, iface); + if (drv->usb_driver == driver) + id = get_iface_id(drv, iface); if (id) return drv; } From 0b84704a2d6fdf55a3dcdd2a1f2ac8b48913c84b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 31 May 2012 17:03:52 -0400 Subject: [PATCH 422/475] USB: serial-generic: use a single set of device IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The usb-serial-generic driver uses different device IDs for its USB matching and its serial matching. This can lead to problems: The driver can end up getting bound to a USB interface without being allowed to bind to the corresponding serial port. This patch (as1557) fixes the problem by using the same device ID table (the one that can be altered by the "vendor=" and "product=" module parameters) for both purposes. The unused table is removed. Now the driver will bind only to the intended devices. Signed-off-by: Alan Stern CC: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/generic.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 105a6d898ca..9b026bf7afe 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -39,13 +39,6 @@ MODULE_PARM_DESC(product, "User specified USB idProduct"); static struct usb_device_id generic_device_ids[2]; /* Initially all zeroes. */ -/* we want to look at all devices, as the vendor/product id can change - * depending on the command line argument */ -static const struct usb_device_id generic_serial_ids[] = { - {.driver_info = 42}, - {} -}; - /* All of the device info needed for the Generic Serial Converter */ struct usb_serial_driver usb_serial_generic_device = { .driver = { @@ -79,7 +72,8 @@ int usb_serial_generic_register(int _debug) USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT; /* register our generic driver with ourselves */ - retval = usb_serial_register_drivers(serial_drivers, "usbserial_generic", generic_serial_ids); + retval = usb_serial_register_drivers(serial_drivers, + "usbserial_generic", generic_device_ids); #endif return retval; } From 954fba0274058d27c7c07b5ea07c41b3b7477894 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 19:30:21 +0000 Subject: [PATCH 423/475] netpoll: fix netpoll_send_udp() bugs Bogdan Hamciuc diagnosed and fixed following bug in netpoll_send_udp() : "skb->len += len;" instead of "skb_put(skb, len);" Meaning that _if_ a network driver needs to call skb_realloc_headroom(), only packet headers would be copied, leaving garbage in the payload. However the skb_realloc_headroom() must be avoided as much as possible since it requires memory and netpoll tries hard to work even if memory is exhausted (using a pool of preallocated skbs) It appears netpoll_send_udp() reserved 16 bytes for the ethernet header, which happens to work for typicall drivers but not all. Right thing is to use LL_RESERVED_SPACE(dev) (And also add dev->needed_tailroom of tailroom) This patch combines both fixes. Many thanks to Bogdan for raising this issue. Reported-by: Bogdan Hamciuc Signed-off-by: Eric Dumazet Tested-by: Bogdan Hamciuc Cc: Herbert Xu Cc: Neil Horman Reviewed-by: Neil Horman Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3d84fb9d887..f9f40b932e4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -362,22 +362,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { - int total_len, eth_len, ip_len, udp_len; + int total_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; udp_len = len + sizeof(*udph); - ip_len = eth_len = udp_len + sizeof(*iph); - total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); - skb = find_skb(np, total_len, total_len - len); + skb = find_skb(np, total_len + np->dev->needed_tailroom, + total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); - skb->len += len; + skb_put(skb, len); skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); From d6cb3e41386f20fb0777d0b59a2def82c65d37f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 23:50:04 +0000 Subject: [PATCH 424/475] bnx2x: fix checksum validation bnx2x driver incorrectly sets ip_summed to CHECKSUM_UNNECESSARY on encapsulated segments. TCP stack happily accepts frames with bad checksums, if they are inside a GRE or IPIP encapsulation. Our understanding is that if no IP or L4 csum validation was done by the hardware, we should leave ip_summed as is (CHECKSUM_NONE), since hardware doesn't provide CHECKSUM_COMPLETE support in its cqe. Then, if IP/L4 checksumming was done by the hardware, set CHECKSUM_UNNECESSARY if no error was flagged. Patch based on findings and analysis from Robert Evans Signed-off-by: Eric Dumazet Cc: Eilon Greenstein Cc: Yaniv Rosner Cc: Merav Sicron Cc: Tom Herbert Cc: Robert Evans Cc: Willem de Bruijn Acked-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 15 ----------- .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 27 ++++++++++++++----- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index e30e2a2f354..7de82418497 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -747,21 +747,6 @@ struct bnx2x_fastpath { #define ETH_RX_ERROR_FALGS ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG -#define BNX2X_IP_CSUM_ERR(cqe) \ - (!((cqe)->fast_path_cqe.status_flags & \ - ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG) && \ - ((cqe)->fast_path_cqe.type_error_flags & \ - ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG)) - -#define BNX2X_L4_CSUM_ERR(cqe) \ - (!((cqe)->fast_path_cqe.status_flags & \ - ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) && \ - ((cqe)->fast_path_cqe.type_error_flags & \ - ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) - -#define BNX2X_RX_CSUM_OK(cqe) \ - (!(BNX2X_L4_CSUM_ERR(cqe) || BNX2X_IP_CSUM_ERR(cqe))) - #define BNX2X_PRS_FLAG_OVERETH_IPV4(flags) \ (((le16_to_cpu(flags) & \ PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) >> \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ad0743bf4bd..cbc56f274e0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -617,6 +617,25 @@ static int bnx2x_alloc_rx_data(struct bnx2x *bp, return 0; } +static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe, + struct bnx2x_fastpath *fp) +{ + /* Do nothing if no IP/L4 csum validation was done */ + + if (cqe->fast_path_cqe.status_flags & + (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | + ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)) + return; + + /* If both IP/L4 validation were done, check if an error was found. */ + + if (cqe->fast_path_cqe.type_error_flags & + (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | + ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) + fp->eth_q_stats.hw_csum_err++; + else + skb->ip_summed = CHECKSUM_UNNECESSARY; +} int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) { @@ -806,13 +825,9 @@ reuse_rx: skb_checksum_none_assert(skb); - if (bp->dev->features & NETIF_F_RXCSUM) { + if (bp->dev->features & NETIF_F_RXCSUM) + bnx2x_csum_validate(skb, cqe, fp); - if (likely(BNX2X_RX_CSUM_OK(cqe))) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - fp->eth_q_stats.hw_csum_err++; - } skb_record_rx_queue(skb, fp->rx_queue); From afff07e61a5243e14ee3f0a272a0380cd744a8a3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 13 Jun 2012 11:44:58 +0200 Subject: [PATCH 425/475] usb-storage: Add 090c:1000 to unusal-devs This device gives a bogus answer to get_capacity(16): [ 8628.278614] scsi 8:0:0:0: Direct-Access USB 2.0 USB Flash Drive 1100 PQ: 0 ANSI: 4 [ 8628.279452] sd 8:0:0:0: Attached scsi generic sg4 type 0 [ 8628.280338] sd 8:0:0:0: [sdd] 35747322042253313 512-byte logical blocks: (18.3 EB/15.8 EiB) So set the quirk flag to avoid using get_capacity(16) with it: [11731.386014] usb-storage 2-1.6:1.0: Quirks match for vid 090c pid 1000: 80000 [11731.386075] scsi9 : usb-storage 2-1.6:1.0 [11731.386172] usbcore: registered new interface driver usb-storage [11731.386175] USB Mass Storage support registered. [11732.387394] scsi 9:0:0:0: Direct-Access USB 2.0 USB Flash Drive 1100 PQ: 0 ANSI: 4 [11732.388462] sd 9:0:0:0: Attached scsi generic sg3 type 0 [11732.389432] sd 9:0:0:0: [sdc] 7975296 512-byte logical blocks: (4.08 GB/3.80 GiB) Which makes the capacity look a lot more sane :) Signed-off-by: Hans de Goede Tested-by: Simon Raffeiner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1719886bb9b..caf22bf5f82 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1107,6 +1107,13 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +/* Feiya QDI U2 DISK, reported by Hans de Goede */ +UNUSUAL_DEV( 0x090c, 0x1000, 0x0000, 0xffff, + "Feiya", + "QDI U2 DISK", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_READ_CAPACITY_16 ), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", From 55558c33d6d46cb6f426b729f4119e3b2f12f02b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 22 May 2012 20:54:34 +0300 Subject: [PATCH 426/475] USB: Checking the wrong variable in usb_disable_lpm() We check "u1_params" instead of checking "u2_params". Signed-off-by: Dan Carpenter Signed-off-by: Sarah Sharp --- drivers/usb/core/hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 04fb834c3fa..25a7422ee65 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3379,7 +3379,7 @@ int usb_disable_lpm(struct usb_device *udev) return 0; udev->lpm_disable_count++; - if ((udev->u1_params.timeout == 0 && udev->u1_params.timeout == 0)) + if ((udev->u1_params.timeout == 0 && udev->u2_params.timeout == 0)) return 0; /* If LPM is enabled, attempt to disable it. */ From e25e62aecac42379e113c63a674a86ae3ebbec8d Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Thu, 7 Jun 2012 11:10:32 -0700 Subject: [PATCH 427/475] xhci: Fix error path return value. This patch fixes an issue discovered by Dan Carpenter: The patch 3b3db026414b: "xhci: Add infrastructure for host-specific LPM policies." from May 9, 2012, leads to the following warning: drivers/usb/host/xhci.c:3909 xhci_get_timeout_no_hub_lpm() warn: signedness bug returning '-22' 3906 default: 3907 dev_warn(&udev->dev, "%s: Can't get timeout for non-U1 or U2 state.\n", 3908 __func__); 3909 return -EINVAL; ^^^^^^^^^^^^^^ This should be a u16 like USB3_LPM_DISABLED or something. 3910 } 3911 3912 if (sel <= max_sel_pel && pel <= max_sel_pel) 3913 return USB3_LPM_DEVICE_INITIATED; Signed-off-by: Sarah Sharp Reported-by: Dan Carpenter --- drivers/usb/host/xhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index afdc73ee84a..1d4958f7f00 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3906,7 +3906,7 @@ static u16 xhci_get_timeout_no_hub_lpm(struct usb_device *udev, default: dev_warn(&udev->dev, "%s: Can't get timeout for non-U1 or U2 state.\n", __func__); - return -EINVAL; + return USB3_LPM_DISABLED; } if (sel <= max_sel_pel && pel <= max_sel_pel) From 46ed8f00d8982e49f8fe2c1a9cea192f640cb3ba Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 1 Jun 2012 10:06:23 +0200 Subject: [PATCH 428/475] xhci: Fix invalid loop check in xhci_free_tt_info() xhci_free_tt_info() may access the invalid memory when it removes the last entry but the list is not empty. Then tt_next reaches to the list head but it still tries to check the tt_info of that entry. This patch fixes the bug and cleans up the messy code by rewriting with a simple list_for_each_entry_safe(). This patch should be backported to kernels as old as 3.2, that contain the commit 839c817ce67178ca3c7c7ad534c571bba1e69ebe "xhci: Store information about roothubs and TTs." Signed-off-by: Takashi Iwai Signed-off-by: Sarah Sharp Reviewed-by: Oliver Neukum Cc: --- drivers/usb/host/xhci-mem.c | 39 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index ec4338eec82..898dfc8bc52 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -793,10 +793,9 @@ static void xhci_free_tt_info(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, int slot_id) { - struct list_head *tt; struct list_head *tt_list_head; - struct list_head *tt_next; - struct xhci_tt_bw_info *tt_info; + struct xhci_tt_bw_info *tt_info, *next; + bool slot_found = false; /* If the device never made it past the Set Address stage, * it may not have the real_port set correctly. @@ -808,34 +807,16 @@ static void xhci_free_tt_info(struct xhci_hcd *xhci, } tt_list_head = &(xhci->rh_bw[virt_dev->real_port - 1].tts); - if (list_empty(tt_list_head)) - return; - - list_for_each(tt, tt_list_head) { - tt_info = list_entry(tt, struct xhci_tt_bw_info, tt_list); - if (tt_info->slot_id == slot_id) + list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { + /* Multi-TT hubs will have more than one entry */ + if (tt_info->slot_id == slot_id) { + slot_found = true; + list_del(&tt_info->tt_list); + kfree(tt_info); + } else if (slot_found) { break; + } } - /* Cautionary measure in case the hub was disconnected before we - * stored the TT information. - */ - if (tt_info->slot_id != slot_id) - return; - - tt_next = tt->next; - tt_info = list_entry(tt, struct xhci_tt_bw_info, - tt_list); - /* Multi-TT hubs will have more than one entry */ - do { - list_del(tt); - kfree(tt_info); - tt = tt_next; - if (list_empty(tt_list_head)) - break; - tt_next = tt->next; - tt_info = list_entry(tt, struct xhci_tt_bw_info, - tt_list); - } while (tt_info->slot_id == slot_id); } int xhci_alloc_tt_info(struct xhci_hcd *xhci, From 32f1d2c536d0c26c5814cb0e6a0606c42d02fac1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 1 Jun 2012 10:06:24 +0200 Subject: [PATCH 429/475] xhci: Don't free endpoints in xhci_mem_cleanup() This patch fixes a few issues introduced in the recent fix [f8a9e72d: USB: fix resource leak in xhci power loss path] - The endpoints listed in bw table are just links and each entry is an array member of dev->eps[]. But the commit above adds a kfree() call to these instances, and thus it results in memory corruption. - It clears only the first entry of rh_bw[], but there can be multiple ports. - It'd be safer to clear the list_head of ep as well, not only removing from the list, as it's checked in xhci_discover_or_reset_device(). This patch should be backported to kernels as old as 3.2, that contain the commit 839c817ce67178ca3c7c7ad534c571bba1e69ebe "xhci: Store information about roothubs and TTs." Signed-off-by: Takashi Iwai Signed-off-by: Sarah Sharp Reviewed-by: Oliver Neukum Cc: --- drivers/usb/host/xhci-mem.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 898dfc8bc52..77689bd64ca 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1772,17 +1772,9 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) { struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); struct dev_info *dev_info, *next; - struct list_head *tt_list_head; - struct list_head *tt; - struct list_head *endpoints; - struct list_head *ep, *q; - struct xhci_tt_bw_info *tt_info; - struct xhci_interval_bw_table *bwt; - struct xhci_virt_ep *virt_ep; - unsigned long flags; int size; - int i; + int i, j, num_ports; /* Free the Event Ring Segment Table and the actual Event Ring */ size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); @@ -1841,21 +1833,22 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) } spin_unlock_irqrestore(&xhci->lock, flags); - bwt = &xhci->rh_bw->bw_table; - for (i = 0; i < XHCI_MAX_INTERVAL; i++) { - endpoints = &bwt->interval_bw[i].endpoints; - list_for_each_safe(ep, q, endpoints) { - virt_ep = list_entry(ep, struct xhci_virt_ep, bw_endpoint_list); - list_del(&virt_ep->bw_endpoint_list); - kfree(virt_ep); + num_ports = HCS_MAX_PORTS(xhci->hcs_params1); + for (i = 0; i < num_ports; i++) { + struct xhci_interval_bw_table *bwt = &xhci->rh_bw[i].bw_table; + for (j = 0; j < XHCI_MAX_INTERVAL; j++) { + struct list_head *ep = &bwt->interval_bw[j].endpoints; + while (!list_empty(ep)) + list_del_init(ep->next); } } - tt_list_head = &xhci->rh_bw->tts; - list_for_each_safe(tt, q, tt_list_head) { - tt_info = list_entry(tt, struct xhci_tt_bw_info, tt_list); - list_del(tt); - kfree(tt_info); + for (i = 0; i < num_ports; i++) { + struct xhci_tt_bw_info *tt, *n; + list_for_each_entry_safe(tt, n, &xhci->rh_bw[i].tts, tt_list) { + list_del(&tt->tt_list); + kfree(tt); + } } xhci->num_usb2_ports = 0; From 622eb783fe6ff4c1baa47db16c3a5db97f9e6e50 Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Wed, 13 Jun 2012 10:51:57 +0800 Subject: [PATCH 430/475] xHCI: Increase the timeout for controller save/restore state operation When system software decides to power down the xHC with the intent of resuming operation at a later time, it will ask xHC to save the internal state and restore it when resume to correctly recover from a power event. Two bits are used to enable this operation: Save State and Restore State. xHCI spec 4.23.2 says software should "Set the Controller Save/Restore State flag in the USBCMD register and wait for the Save/Restore State Status flag in the USBSTS register to transition to '0'". However, it does not define how long software should wait for the SSS/RSS bit to transition to 0. Currently the timeout is set to 1ms. There is bug report (https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1002697) indicates that the timeout is too short for ASMedia ASM1042 host controller to save/restore the state successfully. Increase the timeout to 10ms helps to resolve the issue. This patch should be backported to stable kernels as old as 2.6.37, that contain the commit 5535b1d5f8885695c6ded783c692e3c0d0eda8ca "USB: xHCI: PCI power management implementation" Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp Cc: Ming Lei Cc: stable@vger.kernel.org --- drivers/usb/host/xhci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1d4958f7f00..a979cd0dbe0 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -795,8 +795,8 @@ int xhci_suspend(struct xhci_hcd *xhci) command = xhci_readl(xhci, &xhci->op_regs->command); command |= CMD_CSS; xhci_writel(xhci, command, &xhci->op_regs->command); - if (handshake(xhci, &xhci->op_regs->status, STS_SAVE, 0, 10*100)) { - xhci_warn(xhci, "WARN: xHC CMD_CSS timeout\n"); + if (handshake(xhci, &xhci->op_regs->status, STS_SAVE, 0, 10 * 1000)) { + xhci_warn(xhci, "WARN: xHC save state timeout\n"); spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } @@ -848,8 +848,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) command |= CMD_CRS; xhci_writel(xhci, command, &xhci->op_regs->command); if (handshake(xhci, &xhci->op_regs->status, - STS_RESTORE, 0, 10*100)) { - xhci_dbg(xhci, "WARN: xHC CMD_CSS timeout\n"); + STS_RESTORE, 0, 10 * 1000)) { + xhci_warn(xhci, "WARN: xHC restore state timeout\n"); spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } From 4f7a67e2dd49fbfba002c453bc24bf00e701cc71 Mon Sep 17 00:00:00 2001 From: Ricardo Martins Date: Tue, 22 May 2012 18:02:03 +0100 Subject: [PATCH 431/475] USB: fix PS3 EHCI systems After commit aaa0ef289afe9186f81e2340114ea413eef0492a "PS3 EHCI QH read work-around", Terratec Grabby (em28xx) stopped working with AMD Geode LX 800 (USB controller AMD CS5536). Since this is a PS3 only fix, the following patch adds a conditional block around it. Signed-off-by: Ricardo Martins Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index b100f5f9f4b..800be38c78b 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -671,7 +671,9 @@ static int ehci_init(struct usb_hcd *hcd) hw = ehci->async->hw; hw->hw_next = QH_NEXT(ehci, ehci->async->qh_dma); hw->hw_info1 = cpu_to_hc32(ehci, QH_HEAD); +#if defined(CONFIG_PPC_PS3) hw->hw_info1 |= cpu_to_hc32(ehci, (1 << 7)); /* I = 1 */ +#endif hw->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT); hw->hw_qtd_next = EHCI_LIST_END(ehci); ehci->async->qh_state = QH_STATE_LINKED; From 07828b10985393cd875eac1545fba47107e97bf9 Mon Sep 17 00:00:00 2001 From: Herton Ronaldo Krzesinski Date: Tue, 22 May 2012 13:47:25 -0300 Subject: [PATCH 432/475] USB: EHCI: Fix build warning in xilinx ehci driver This fixes the following warning: In file included from drivers/usb/host/ehci-hcd.c:1246:0: drivers/usb/host/ehci-xilinx-of.c:293:2: warning: initialization from incompatible pointer type [enabled by default] drivers/usb/host/ehci-xilinx-of.c:293:2: warning: (near initialization for 'ehci_hcd_xilinx_of_driver.shutdown') [enabled by default] Signed-off-by: Herton Ronaldo Krzesinski Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-xilinx-of.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c index 9c2cc463389..e9713d589e3 100644 --- a/drivers/usb/host/ehci-xilinx-of.c +++ b/drivers/usb/host/ehci-xilinx-of.c @@ -270,14 +270,12 @@ static int ehci_hcd_xilinx_of_remove(struct platform_device *op) * * Properly shutdown the hcd, call driver's shutdown routine. */ -static int ehci_hcd_xilinx_of_shutdown(struct platform_device *op) +static void ehci_hcd_xilinx_of_shutdown(struct platform_device *op) { struct usb_hcd *hcd = dev_get_drvdata(&op->dev); if (hcd->driver->shutdown) hcd->driver->shutdown(hcd); - - return 0; } From 0658a3366db7e27fa32c12e886230bb58c414c92 Mon Sep 17 00:00:00 2001 From: Jan Safrata Date: Tue, 22 May 2012 14:04:50 +0200 Subject: [PATCH 433/475] usb: use usb_serial_put in usb_serial_probe errors The use of kfree(serial) in error cases of usb_serial_probe was invalid - usb_serial structure allocated in create_serial() gets reference of usb_device that needs to be put, so we need to use usb_serial_put() instead of simple kfree(). Signed-off-by: Jan Safrata Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 6e8c527e07c..27483f91a4a 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -757,7 +757,7 @@ static int usb_serial_probe(struct usb_interface *interface, if (retval) { dbg("sub driver rejected device"); - kfree(serial); + usb_serial_put(serial); module_put(type->driver.owner); return retval; } @@ -829,7 +829,7 @@ static int usb_serial_probe(struct usb_interface *interface, */ if (num_bulk_in == 0 || num_bulk_out == 0) { dev_info(&interface->dev, "PL-2303 hack: descriptors matched but endpoints did not\n"); - kfree(serial); + usb_serial_put(serial); module_put(type->driver.owner); return -ENODEV; } @@ -843,7 +843,7 @@ static int usb_serial_probe(struct usb_interface *interface, if (num_ports == 0) { dev_err(&interface->dev, "Generic device with no bulk out, not allowed.\n"); - kfree(serial); + usb_serial_put(serial); module_put(type->driver.owner); return -EIO; } From c4828d96900453f1ad506b1488928307711a8b28 Mon Sep 17 00:00:00 2001 From: Roland Stigge Date: Mon, 11 Jun 2012 21:38:29 +0200 Subject: [PATCH 434/475] USB: ohci-hub: Mark ohci_finish_controller_resume() as __maybe_unused ohci_finish_controller_resume() is intended to be used in platform specific drivers ohci-*.c, included from ohci-hcd.c. Some of them don't actually use ohci_finish_controller_resume(), so mark it as __maybe_unused. Signed-off-by: Roland Stigge Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 836772dfabd..2f3619eefef 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -317,7 +317,7 @@ static int ohci_bus_resume (struct usb_hcd *hcd) } /* Carry out the final steps of resuming the controller device */ -static void ohci_finish_controller_resume(struct usb_hcd *hcd) +static void __maybe_unused ohci_finish_controller_resume(struct usb_hcd *hcd) { struct ohci_hcd *ohci = hcd_to_ohci(hcd); int port; From 354ab8567ae3107a8cbe7228c3181990ba598aac Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Tue, 5 Jun 2012 15:34:27 +0300 Subject: [PATCH 435/475] Fix OMAP EHCI suspend/resume failure (i693) Its observed with some PHY, the 60Mhz clock gets cut too soon for OMAP EHCI, leaving OMAP-EHCI in a bad state. So on starting port suspend, make sure the 60Mhz clock to EHCI is kept alive using an internal clock, so that EHCi can cleanly transition its hw state machine on a port suspend. Its not proven if this is the issue hit on USB3333, but the symptoms look very similar. Signed-off-by: Anand Gadiyar Signed-off-by: Vikram Pandita Signed-off-by: Volodymyr Mieshkov Acked-by: Alan Stern Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-omap.c | 168 ++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index a44294d1349..17cfb8a1131 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -43,6 +43,7 @@ #include #include #include +#include /* EHCI Register Set */ #define EHCI_INSNREG04 (0xA0) @@ -55,6 +56,15 @@ #define EHCI_INSNREG05_ULPI_EXTREGADD_SHIFT 8 #define EHCI_INSNREG05_ULPI_WRDATA_SHIFT 0 +/* Errata i693 */ +static struct clk *utmi_p1_fck; +static struct clk *utmi_p2_fck; +static struct clk *xclk60mhsp1_ck; +static struct clk *xclk60mhsp2_ck; +static struct clk *usbhost_p1_fck; +static struct clk *usbhost_p2_fck; +static struct clk *init_60m_fclk; + /*-------------------------------------------------------------------------*/ static const struct hc_driver ehci_omap_hc_driver; @@ -70,6 +80,41 @@ static inline u32 ehci_read(void __iomem *base, u32 reg) return __raw_readl(base + reg); } +/* Erratum i693 workaround sequence */ +static void omap_ehci_erratum_i693(struct ehci_hcd *ehci) +{ + int ret = 0; + + /* Switch to the internal 60 MHz clock */ + ret = clk_set_parent(utmi_p1_fck, init_60m_fclk); + if (ret != 0) + ehci_err(ehci, "init_60m_fclk set parent" + "failed error:%d\n", ret); + + ret = clk_set_parent(utmi_p2_fck, init_60m_fclk); + if (ret != 0) + ehci_err(ehci, "init_60m_fclk set parent" + "failed error:%d\n", ret); + + clk_enable(usbhost_p1_fck); + clk_enable(usbhost_p2_fck); + + /* Wait 1ms and switch back to the external clock */ + mdelay(1); + ret = clk_set_parent(utmi_p1_fck, xclk60mhsp1_ck); + if (ret != 0) + ehci_err(ehci, "xclk60mhsp1_ck set parent" + "failed error:%d\n", ret); + + ret = clk_set_parent(utmi_p2_fck, xclk60mhsp2_ck); + if (ret != 0) + ehci_err(ehci, "xclk60mhsp2_ck set parent" + "failed error:%d\n", ret); + + clk_disable(usbhost_p1_fck); + clk_disable(usbhost_p2_fck); +} + static void omap_ehci_soft_phy_reset(struct platform_device *pdev, u8 port) { struct usb_hcd *hcd = dev_get_drvdata(&pdev->dev); @@ -100,6 +145,50 @@ static void omap_ehci_soft_phy_reset(struct platform_device *pdev, u8 port) } } +static int omap_ehci_hub_control( + struct usb_hcd *hcd, + u16 typeReq, + u16 wValue, + u16 wIndex, + char *buf, + u16 wLength +) +{ + struct ehci_hcd *ehci = hcd_to_ehci(hcd); + u32 __iomem *status_reg = &ehci->regs->port_status[ + (wIndex & 0xff) - 1]; + u32 temp; + unsigned long flags; + int retval = 0; + + spin_lock_irqsave(&ehci->lock, flags); + + if (typeReq == SetPortFeature && wValue == USB_PORT_FEAT_SUSPEND) { + temp = ehci_readl(ehci, status_reg); + if ((temp & PORT_PE) == 0 || (temp & PORT_RESET) != 0) { + retval = -EPIPE; + goto done; + } + + temp &= ~PORT_WKCONN_E; + temp |= PORT_WKDISC_E | PORT_WKOC_E; + ehci_writel(ehci, temp | PORT_SUSPEND, status_reg); + + omap_ehci_erratum_i693(ehci); + + set_bit((wIndex & 0xff) - 1, &ehci->suspended_ports); + goto done; + } + + spin_unlock_irqrestore(&ehci->lock, flags); + + /* Handle the hub control events here */ + return ehci_hub_control(hcd, typeReq, wValue, wIndex, buf, wLength); +done: + spin_unlock_irqrestore(&ehci->lock, flags); + return retval; +} + static void disable_put_regulator( struct ehci_hcd_omap_platform_data *pdata) { @@ -264,8 +353,76 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) /* root ports should always stay powered */ ehci_port_power(omap_ehci, 1); + /* get clocks */ + utmi_p1_fck = clk_get(dev, "utmi_p1_gfclk"); + if (IS_ERR(utmi_p1_fck)) { + ret = PTR_ERR(utmi_p1_fck); + dev_err(dev, "utmi_p1_gfclk failed error:%d\n", ret); + goto err_add_hcd; + } + + xclk60mhsp1_ck = clk_get(dev, "xclk60mhsp1_ck"); + if (IS_ERR(xclk60mhsp1_ck)) { + ret = PTR_ERR(xclk60mhsp1_ck); + dev_err(dev, "xclk60mhsp1_ck failed error:%d\n", ret); + goto err_utmi_p1_fck; + } + + utmi_p2_fck = clk_get(dev, "utmi_p2_gfclk"); + if (IS_ERR(utmi_p2_fck)) { + ret = PTR_ERR(utmi_p2_fck); + dev_err(dev, "utmi_p2_gfclk failed error:%d\n", ret); + goto err_xclk60mhsp1_ck; + } + + xclk60mhsp2_ck = clk_get(dev, "xclk60mhsp2_ck"); + if (IS_ERR(xclk60mhsp2_ck)) { + ret = PTR_ERR(xclk60mhsp2_ck); + dev_err(dev, "xclk60mhsp2_ck failed error:%d\n", ret); + goto err_utmi_p2_fck; + } + + usbhost_p1_fck = clk_get(dev, "usb_host_hs_utmi_p1_clk"); + if (IS_ERR(usbhost_p1_fck)) { + ret = PTR_ERR(usbhost_p1_fck); + dev_err(dev, "usbhost_p1_fck failed error:%d\n", ret); + goto err_xclk60mhsp2_ck; + } + + usbhost_p2_fck = clk_get(dev, "usb_host_hs_utmi_p2_clk"); + if (IS_ERR(usbhost_p2_fck)) { + ret = PTR_ERR(usbhost_p2_fck); + dev_err(dev, "usbhost_p2_fck failed error:%d\n", ret); + goto err_usbhost_p1_fck; + } + + init_60m_fclk = clk_get(dev, "init_60m_fclk"); + if (IS_ERR(init_60m_fclk)) { + ret = PTR_ERR(init_60m_fclk); + dev_err(dev, "init_60m_fclk failed error:%d\n", ret); + goto err_usbhost_p2_fck; + } + return 0; +err_usbhost_p2_fck: + clk_put(usbhost_p2_fck); + +err_usbhost_p1_fck: + clk_put(usbhost_p1_fck); + +err_xclk60mhsp2_ck: + clk_put(xclk60mhsp2_ck); + +err_utmi_p2_fck: + clk_put(utmi_p2_fck); + +err_xclk60mhsp1_ck: + clk_put(xclk60mhsp1_ck); + +err_utmi_p1_fck: + clk_put(utmi_p1_fck); + err_add_hcd: disable_put_regulator(pdata); pm_runtime_put_sync(dev); @@ -294,6 +451,15 @@ static int ehci_hcd_omap_remove(struct platform_device *pdev) disable_put_regulator(dev->platform_data); iounmap(hcd->regs); usb_put_hcd(hcd); + + clk_put(utmi_p1_fck); + clk_put(utmi_p2_fck); + clk_put(xclk60mhsp1_ck); + clk_put(xclk60mhsp2_ck); + clk_put(usbhost_p1_fck); + clk_put(usbhost_p2_fck); + clk_put(init_60m_fclk); + pm_runtime_put_sync(dev); pm_runtime_disable(dev); @@ -364,7 +530,7 @@ static const struct hc_driver ehci_omap_hc_driver = { * root hub support */ .hub_status_data = ehci_hub_status_data, - .hub_control = ehci_hub_control, + .hub_control = omap_ehci_hub_control, .bus_suspend = ehci_bus_suspend, .bus_resume = ehci_bus_resume, From a70270468234749741c5893ae78e5bb524771402 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 13 Jun 2012 09:35:48 -0400 Subject: [PATCH 436/475] watchdog: Quiet down the boot messages A bunch of bugzillas have complained how noisy the nmi_watchdog is during boot-up especially with its expected failure cases (like virt and bios resource contention). This is my attempt to quiet them down and keep it less confusing for the end user. What I did is print the message for cpu0 and save it for future comparisons. If future cpus have an identical message as cpu0, then don't print the redundant info. However, if a future cpu has a different message, happily print that loudly. Before the change, you would see something like: ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 CPU0: Intel(R) Core(TM)2 Quad CPU Q9550 @ 2.83GHz stepping 0a Performance Events: PEBS fmt0+, Core2 events, Intel PMU driver. ... version: 2 ... bit width: 40 ... generic registers: 2 ... value mask: 000000ffffffffff ... max period: 000000007fffffff ... fixed-purpose events: 3 ... event mask: 0000000700000003 NMI watchdog enabled, takes one hw-pmu counter. Booting Node 0, Processors #1 NMI watchdog enabled, takes one hw-pmu counter. #2 NMI watchdog enabled, takes one hw-pmu counter. #3 Ok. NMI watchdog enabled, takes one hw-pmu counter. Brought up 4 CPUs Total of 4 processors activated (22607.24 BogoMIPS). After the change, it is simplified to: ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 CPU0: Intel(R) Core(TM)2 Quad CPU Q9550 @ 2.83GHz stepping 0a Performance Events: PEBS fmt0+, Core2 events, Intel PMU driver. ... version: 2 ... bit width: 40 ... generic registers: 2 ... value mask: 000000ffffffffff ... max period: 000000007fffffff ... fixed-purpose events: 3 ... event mask: 0000000700000003 NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter. Booting Node 0, Processors #1 #2 #3 Ok. Brought up 4 CPUs V2: little changes based on Joe Perches' feedback V3: printk cleanup based on Ingo's feedback; checkpatch fix V4: keep printk as one long line V5: Ingo fix ups Reported-and-tested-by: Nathan Zimmer Signed-off-by: Don Zickus Cc: nzimmer@sgi.com Cc: joe@perches.com Link: http://lkml.kernel.org/r/1339594548-17227-1-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e5e1d85b8c7..4b1dfba70f7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -372,6 +372,13 @@ static int watchdog(void *unused) #ifdef CONFIG_HARDLOCKUP_DETECTOR +/* + * People like the simple clean cpu node info on boot. + * Reduce the watchdog noise by only printing messages + * that are different from what cpu0 displayed. + */ +static unsigned long cpu0_err; + static int watchdog_nmi_enable(int cpu) { struct perf_event_attr *wd_attr; @@ -390,11 +397,21 @@ static int watchdog_nmi_enable(int cpu) /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); + + /* save cpu0 error for future comparision */ + if (cpu == 0 && IS_ERR(event)) + cpu0_err = PTR_ERR(event); + if (!IS_ERR(event)) { - pr_info("enabled, takes one hw-pmu counter.\n"); + /* only print for cpu0 or different than cpu0 */ + if (cpu == 0 || cpu0_err) + pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); goto out_save; } + /* skip displaying the same error again */ + if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) + return PTR_ERR(event); /* vary the KERN level based on the returned errno */ if (PTR_ERR(event) == -EOPNOTSUPP) From c080e26edc3a2a3cdfa4c430c663ee1c3bbd8fae Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Jun 2012 14:01:30 +0200 Subject: [PATCH 437/475] x86: dma-mapping: fix broken allocation when dma_mask has been provided Commit 0a2b9a6ea93 ("X86: integrate CMA with DMA-mapping subsystem") broke memory allocation with dma_mask. This patch fixes possible kernel ops caused by lack of resetting page variable when jumping to 'again' label. Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Marek Szyprowski Acked-by: Michal Nazarewicz --- arch/x86/kernel/pci-dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 62c9457ccd2..c0f420f76cd 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -100,7 +100,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { unsigned long dma_mask; - struct page *page = NULL; + struct page *page; unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t addr; @@ -108,6 +108,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, flag |= __GFP_ZERO; again: + page = NULL; if (!(flag & GFP_ATOMIC)) page = dma_alloc_from_contiguous(dev, count, get_order(size)); if (!page) From f617e2fd52484fb74236a597d0f9068ec7d9d2dd Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 14 Jun 2012 16:10:13 +0200 Subject: [PATCH 438/475] Btrfs: remove obsolete btrfs_next_leaf call from __resolve_indirect_ref When resolving indirect refs, we used to call btrfs_next_leaf in case we didn't find an exact match. While we should find exact matches most of the time, in case we don't, we must continue searching. Treating those matches differently depending on the level we're searching doesn't make sense. Even worse, we might end up searching for a key larger than the largest, in which case there is no next_leaf and subsequent jobs would fail. This commit drops the bogous lines. Signed-off-by: Jan Schmidt --- fs/btrfs/backref.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3f75895c919..579131de1b3 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -294,16 +294,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; } - if (level == 0) { - if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) { - ret = btrfs_next_leaf(root, path); - if (ret) - goto out; - eb = path->nodes[0]; - } - + if (level == 0) btrfs_item_key_to_cpu(eb, &key, path->slots[0]); - } ret = add_all_parents(root, path, parents, level, &key, ref->wanted_disk_byte, extent_item_pos); From 8ba97a15e7d4f70b9af71fa1db86a28dd17ad1b2 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 4 Jun 2012 16:54:57 +0200 Subject: [PATCH 439/475] Btrfs: use btrfs_read_lock_root_node in get_old_root get_old_root could race with root node updates because we weren't locking the node early enough. Use btrfs_read_lock_root_node to grab the root locked in the very beginning and release the lock as soon as possible (just like btrfs_search_slot does). Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 836e4e03edc..2cde7b0a010 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1143,6 +1143,13 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, return eb_rewin; } +/* + * get_old_root() rewinds the state of @root's root node to the given @time_seq + * value. If there are no changes, the current root->root_node is returned. If + * anything changed in between, there's a fresh buffer allocated on which the + * rewind operations are done. In any case, the returned buffer is read locked. + * Returns NULL on error (with no locks held). + */ static inline struct extent_buffer * get_old_root(struct btrfs_root *root, u64 time_seq) { @@ -1151,6 +1158,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) struct tree_mod_root *old_root; u64 old_generation; + eb = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); if (!tm) return root->node; @@ -1173,15 +1181,21 @@ get_old_root(struct btrfs_root *root, u64 time_seq) /* there's a root replace operation for the current root */ eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, root->nodesize); + } + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); + if (!eb) + return NULL; + btrfs_tree_read_lock(eb); + if (old_root->logical != root->node->start) { btrfs_set_header_bytenr(eb, eb->start); btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(eb, root->root_key.objectid); } - if (!eb) - return NULL; btrfs_set_header_level(eb, old_root->level); btrfs_set_header_generation(eb, old_generation); __tree_mod_log_rewind(eb, time_seq, tm); + extent_buffer_get(eb); return eb; } @@ -2612,9 +2626,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, again: b = get_old_root(root, time_seq); - extent_buffer_get(b); level = btrfs_header_level(b); - btrfs_tree_read_lock(b); p->locks[level] = BTRFS_READ_LOCK; while (b) { From a95236d99fa56766f11056903439f55fe5038bcf Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Tue, 5 Jun 2012 16:41:24 +0200 Subject: [PATCH 440/475] Btrfs: fix return value for __tree_mod_log_oldest_root In __tree_mod_log_oldest_root() we must return the found operation even if it's not a ROOT_REPLACE operation. Otherwise, the caller assumes that there are no operations to be rewinded and returns immediately. The code in the caller is modified to improve readability. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2cde7b0a010..50d7c99ddce 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1023,6 +1023,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, looped = 1; } + /* if there's no old root to return, return what we found instead */ + if (!found) + found = tm; + return found; } @@ -1155,18 +1159,24 @@ get_old_root(struct btrfs_root *root, u64 time_seq) { struct tree_mod_elem *tm; struct extent_buffer *eb; - struct tree_mod_root *old_root; + struct tree_mod_root *old_root = NULL; u64 old_generation; + u64 logical; eb = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); if (!tm) return root->node; - old_root = &tm->old_root; - old_generation = tm->generation; + if (tm->op == MOD_LOG_ROOT_REPLACE) { + old_root = &tm->old_root; + old_generation = tm->generation; + logical = old_root->logical; + } else { + logical = root->node->start; + } - tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); + tm = tree_mod_log_search(root->fs_info, logical, time_seq); /* * there was an item in the log when __tree_mod_log_oldest_root * returned. this one must not go away, because the time_seq passed to @@ -1174,26 +1184,23 @@ get_old_root(struct btrfs_root *root, u64 time_seq) */ BUG_ON(!tm); - if (old_root->logical == root->node->start) { - /* there are logged operations for the current root */ - eb = btrfs_clone_extent_buffer(root->node); - } else { - /* there's a root replace operation for the current root */ + if (old_root) eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, root->nodesize); - } + else + eb = btrfs_clone_extent_buffer(root->node); btrfs_tree_read_unlock(root->node); free_extent_buffer(root->node); if (!eb) return NULL; btrfs_tree_read_lock(eb); - if (old_root->logical != root->node->start) { + if (old_root) { btrfs_set_header_bytenr(eb, eb->start); btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(eb, root->root_key.objectid); + btrfs_set_header_level(eb, old_root->level); + btrfs_set_header_generation(eb, old_generation); } - btrfs_set_header_level(eb, old_root->level); - btrfs_set_header_generation(eb, old_generation); __tree_mod_log_rewind(eb, time_seq, tm); extent_buffer_get(eb); From 3d7806eca43e73a9721d2e09369200ed93036bd0 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 11 Jun 2012 08:29:29 +0200 Subject: [PATCH 441/475] Btrfs: add btrfs_next_old_leaf To make sense of the tree mod log, the backref walker not only needs btrfs_search_old_slot, but it also called btrfs_next_leaf, which in turn was calling btrfs_search_slot. This obviously didn't give the correct result. This commit adds btrfs_next_old_leaf, a drop-in replacement for btrfs_next_leaf with a time_seq parameter. If it is zero, it behaves exactly like btrfs_next_leaf. If it is non-zero, it will use btrfs_search_old_slot with this time_seq parameter. Signed-off-by: Jan Schmidt --- fs/btrfs/backref.c | 7 ++++--- fs/btrfs/ctree.c | 11 ++++++++++- fs/btrfs/ctree.h | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 579131de1b3..8f7d1237b7a 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -179,7 +179,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, struct ulist *parents, int level, - struct btrfs_key *key, u64 wanted_disk_byte, + struct btrfs_key *key, u64 time_seq, + u64 wanted_disk_byte, const u64 *extent_item_pos) { int ret; @@ -212,7 +213,7 @@ add_parent: */ while (1) { eie = NULL; - ret = btrfs_next_leaf(root, path); + ret = btrfs_next_old_leaf(root, path, time_seq); if (ret < 0) return ret; if (ret) @@ -297,7 +298,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, if (level == 0) btrfs_item_key_to_cpu(eb, &key, path->slots[0]); - ret = add_all_parents(root, path, parents, level, &key, + ret = add_all_parents(root, path, parents, level, &key, time_seq, ref->wanted_disk_byte, extent_item_pos); out: btrfs_free_path(path); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50d7c99ddce..cb76b2a1b90 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5016,6 +5016,12 @@ next: * returns < 0 on io errors. */ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + return btrfs_next_old_leaf(root, path, 0); +} + +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, + u64 time_seq) { int slot; int level; @@ -5041,7 +5047,10 @@ again: path->keep_locks = 1; path->leave_spinning = 1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (time_seq) + ret = btrfs_search_old_slot(root, &key, path, time_seq); + else + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; if (ret < 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0151ca1ac65..db15e9e7b91 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2753,6 +2753,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, } int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, + u64 time_seq); static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) { ++p->slots[0]; From 3310c36eef330766fd23d50796287ad764929e24 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 11 Jun 2012 10:52:38 +0200 Subject: [PATCH 442/475] Btrfs: fix race in tree mod log addition When adding to the tree modification log, we grab two locks at different stages. We must not drop the outer lock until we're done with section protected by the inner lock. This moves the unlock call for the outer lock to the appropriate position. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cb76b2a1b90..04b06bcf2ca 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 0; } +/* + * This allocates memory and gets a tree modification sequence number when + * needed. + * + * Returns 0 when no sequence number is needed, < 0 on error. + * Returns 1 when a sequence number was added. In this case, + * fs_info->tree_mod_seq_lock was acquired and must be released by the caller + * after inserting into the rb tree. + */ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, struct tree_mod_elem **tm_ret) { @@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, */ kfree(tm); seq = 0; + spin_unlock(&fs_info->tree_mod_seq_lock); } else { __get_tree_mod_seq(fs_info, &tm->elem); seq = tm->elem.seq; } - spin_unlock(&fs_info->tree_mod_seq_lock); return seq; } @@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, tm->slot = slot; tm->generation = btrfs_node_ptr_generation(eb, slot); - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static noinline int @@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static noinline int @@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static struct tree_mod_elem * From 12918b10d59e975fd5241eef03ef9e6d5ea3dcfe Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 1 Jun 2012 13:55:47 +0400 Subject: [PATCH 443/475] NFS: hard-code init_net for NFS callback transports In case of destroying mount namespace on child reaper exit, nsproxy is zeroed to the point already. So, dereferencing of it is invalid. This patch hard-code "init_net" for all network namespace references for NFS callback services. This will be fixed with proper NFS callback containerization. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfs/callback.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 970659daa32..23ff18fe080 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -107,7 +106,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { int ret; - ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -115,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; @@ -184,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) * fore channel connection. * Returns the input port (0) and sets the svc_serv bc_xprt on success */ - ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, + ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, SVC_SOCK_ANONYMOUS); if (ret < 0) { rqstp = ERR_PTR(ret); @@ -254,7 +253,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) char svc_name[12]; int ret = 0; int minorversion_setup; - struct net *net = current->nsproxy->net_ns; + struct net *net = &init_net; mutex_lock(&nfs_callback_mutex); if (cb_info->users++ || cb_info->task != NULL) { @@ -330,7 +329,7 @@ void nfs_callback_down(int minorversion) cb_info->users--; if (cb_info->users == 0 && cb_info->task != NULL) { kthread_stop(cb_info->task); - svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns); + svc_shutdown_net(cb_info->serv, &init_net); svc_exit_thread(cb_info->rqst); cb_info->serv = NULL; cb_info->rqst = NULL; From bc2df47a408f2d64cf81bcfd0f6e3e14c84cb0ab Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 12 Jun 2012 08:28:48 -0400 Subject: [PATCH 444/475] nfsd4: BUG_ON(!is_spin_locked()) no good on UP kernels Most frequent symptom was a BUG triggering in expire_client, with the server locking up shortly thereafter. Introduced by 508dc6e110c6dbdc0bbe84298ccfe22de7538486 "nfsd41: free_session/free_client must be called under the client_lock". Cc: stable@kernel.org Cc: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8fdc9ec5c5d..94effd5bc4a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -900,7 +900,7 @@ static void free_session(struct kref *kref) struct nfsd4_session *ses; int mem; - BUG_ON(!spin_is_locked(&client_lock)); + lockdep_assert_held(&client_lock); ses = container_of(kref, struct nfsd4_session, se_ref); nfsd4_del_conns(ses); spin_lock(&nfsd_drc_lock); @@ -1080,7 +1080,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - BUG_ON(!spin_is_locked(&client_lock)); + lockdep_assert_held(&client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, From b9e0d95c041ca2d7ad297ee37c2e9cfab67a188f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 23 May 2012 18:57:20 +0100 Subject: [PATCH 445/475] xen: mark local pages as FOREIGN in the m2p_override When the frontend and the backend reside on the same domain, even if we add pages to the m2p_override, these pages will never be returned by mfn_to_pfn because the check "get_phys_to_machine(pfn) != mfn" will always fail, so the pfn of the frontend will be returned instead (resulting in a deadlock because the frontend pages are already locked). INFO: task qemu-system-i38:1085 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. qemu-system-i38 D ffff8800cfc137c0 0 1085 1 0x00000000 ffff8800c47ed898 0000000000000282 ffff8800be4596b0 00000000000137c0 ffff8800c47edfd8 ffff8800c47ec010 00000000000137c0 00000000000137c0 ffff8800c47edfd8 00000000000137c0 ffffffff82213020 ffff8800be4596b0 Call Trace: [] ? __lock_page+0x70/0x70 [] schedule+0x29/0x70 [] io_schedule+0x60/0x80 [] sleep_on_page+0xe/0x20 [] __wait_on_bit_lock+0x5a/0xc0 [] __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x40/0x40 [] ? bio_add_page+0x36/0x40 [] set_page_dirty_lock+0x52/0x60 [] bio_set_pages_dirty+0x51/0x70 [] do_blockdev_direct_IO+0xb24/0xeb0 [] ? ext3_get_blocks_handle+0xe00/0xe00 [] __blockdev_direct_IO+0x55/0x60 [] ? ext3_get_blocks_handle+0xe00/0xe00 [] ext3_direct_IO+0xf8/0x390 [] ? ext3_get_blocks_handle+0xe00/0xe00 [] ? xen_mc_flush+0xb0/0x1b0 [] generic_file_aio_read+0x737/0x780 [] ? gnttab_map_refs+0x15b/0x1e0 [] ? find_get_pages+0x150/0x150 [] aio_rw_vect_retry+0x7c/0x1d0 [] ? lookup_ioctx+0x90/0x90 [] aio_run_iocb+0x66/0x1a0 [] do_io_submit+0x708/0xb90 [] sys_io_submit+0x10/0x20 [] system_call_fastpath+0x16/0x1b The explanation is in the comment within the code: We need to do this because the pages shared by the frontend (xen-blkfront) can be already locked (lock_page, called by do_read_cache_page); when the userspace backend tries to use them with direct_IO, mfn_to_pfn returns the pfn of the frontend, so do_blockdev_direct_IO is going to try to lock the same pages again resulting in a deadlock. A simplified call graph looks like this: pygrub QEMU ----------------------------------------------- do_read_cache_page io_submit | | lock_page ext3_direct_IO | bio_add_page | lock_page Internally the xen-blkback uses m2p_add_override to swizzle (temporarily) a 'struct page' to have a different MFN (so that it can point to another guest). It also can easily find out whether another pfn corresponding to the mfn exists in the m2p, and can set the FOREIGN bit in the p2m, making sure that mfn_to_pfn returns the pfn of the backend. This allows the backend to perform direct_IO on these pages, but as a side effect prevents the frontend from using get_user_pages_fast on them while they are being shared with the backend. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index ffd08c414e9..64effdc6da9 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -706,6 +706,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -741,6 +742,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); + /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in + * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other + * pfn so that the following mfn_to_pfn(mfn) calls will return the + * pfn from the m2p_override (the backend pfn) instead. + * We need to do this because the pages shared by the frontend + * (xen-blkfront) can be already locked (lock_page, called by + * do_read_cache_page); when the userspace backend tries to use them + * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so + * do_blockdev_direct_IO is going to try to lock the same pages + * again resulting in a deadlock. + * As a side effect get_user_pages_fast might not be safe on the + * frontend pages while they are being shared with the backend, + * because mfn_to_pfn (that ends up being called by GUPF) will + * return the backend pfn rather than the frontend pfn. */ + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == mfn) + set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); + return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); @@ -752,6 +771,7 @@ int m2p_remove_override(struct page *page, bool clear_pte) unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -821,6 +841,22 @@ int m2p_remove_override(struct page *page, bool clear_pte) } else set_phys_to_machine(pfn, page->index); + /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present + * somewhere in this domain, even before being added to the + * m2p_override (see comment above in m2p_add_override). + * If there are no other entries in the m2p_override corresponding + * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for + * the original pfn (the one shared by the frontend): the backend + * cannot do any IO on this page anymore because it has been + * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of + * the original pfn causes mfn_to_pfn(mfn) to return the frontend + * pfn again. */ + mfn &= ~FOREIGN_FRAME_BIT; + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + m2p_find_override(mfn) == NULL) + set_phys_to_machine(pfn, mfn); + return 0; } EXPORT_SYMBOL_GPL(m2p_remove_override); From 6c4707f3f8c44ec18282e1c014c80e1c257042f9 Mon Sep 17 00:00:00 2001 From: Otto Meta Date: Wed, 6 Jun 2012 18:46:21 +0200 Subject: [PATCH 446/475] usb: cdc-acm: fix devices not unthrottled on open Currently CDC-ACM devices stay throttled when their TTY is closed while throttled, stalling further communication attempts after the next open. Unthrottling during open/activate got lost starting with kernel 3.0.0 and this patch reintroduces it. Signed-off-by: Otto Meta Cc: stable Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f2a120eea9d..36a2a0b7b82 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -567,6 +567,14 @@ static int acm_port_activate(struct tty_port *port, struct tty_struct *tty) usb_autopm_put_interface(acm->control); + /* + * Unthrottle device in case the TTY was closed while throttled. + */ + spin_lock_irq(&acm->read_lock); + acm->throttled = 0; + acm->throttle_req = 0; + spin_unlock_irq(&acm->read_lock); + if (acm_submit_read_urbs(acm, GFP_KERNEL)) goto error_submit_read_urbs; From 5897b038296ea84e501ba5f957e11939f8cf9be0 Mon Sep 17 00:00:00 2001 From: "Shimoda, Yoshihiro" Date: Tue, 12 Jun 2012 09:34:33 +0900 Subject: [PATCH 447/475] usb: ehci-sh: fix illegal phy_init() running when platform_data is NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the platform_data is not set, pdata will be uninitialized value. Since the driver has the following code, if the condition is true when the pdata is uninitialized value, the driver may jump to the illegal phy_init(). if (pdata && pdata->phy_init) pdata->phy_init(); This patch also fixes the following warning: CC drivers/usb/host/ehci-hcd.o drivers/usb/host/ehci-sh.c: In function ‘ehci_hcd_sh_probe’: drivers/usb/host/ehci-sh.c:104: warning: ‘pdata’ may be used uninitialized in this function Signed-off-by: Yoshihiro Shimoda Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-sh.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-sh.c b/drivers/usb/host/ehci-sh.c index ca819cdd0c5..e7cb3925abf 100644 --- a/drivers/usb/host/ehci-sh.c +++ b/drivers/usb/host/ehci-sh.c @@ -126,8 +126,7 @@ static int ehci_hcd_sh_probe(struct platform_device *pdev) goto fail_create_hcd; } - if (pdev->dev.platform_data != NULL) - pdata = pdev->dev.platform_data; + pdata = pdev->dev.platform_data; /* initialize hcd */ hcd = usb_create_hcd(&ehci_sh_hc_driver, &pdev->dev, From b3a3dd074f7053ef824ad077e5331b52220ceba1 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 12 Jun 2012 20:23:52 +0200 Subject: [PATCH 448/475] USB: fix gathering of interface associations TEAC's UD-H01 (and probably other devices) have a gap in the interface number allocation of their descriptors: Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 220 bNumInterfaces 3 [...] Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 [...] Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 2 bInterfaceCount 2 bFunctionClass 1 Audio bFunctionSubClass 0 bFunctionProtocol 32 iFunction 4 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 [...] Once a configuration is selected, usb_set_configuration() walks the known interfaces of a given configuration and calls find_iad() on each of them to set the interface association pointer the interface is included in. The problem here is that the loop variable is taken for the interface number in the comparison logic that gathers the association. Which is fine as long as the descriptors are sane. In the case above, however, the logic gets out of sync and the interface association fields of all interfaces beyond the interface number gap are wrong. Fix this by passing the interface's bInterfaceNumber to find_iad() instead. Signed-off-by: Daniel Mack Reported-by: bEN Reported-by: Ivan Perrone Tested-by: ivan perrone Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index b548cf1dbc6..bdd1c6749d8 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1838,7 +1838,6 @@ free_interfaces: intfc = cp->intf_cache[i]; intf->altsetting = intfc->altsetting; intf->num_altsetting = intfc->num_altsetting; - intf->intf_assoc = find_iad(dev, cp, i); kref_get(&intfc->ref); alt = usb_altnum_to_altsetting(intf, 0); @@ -1851,6 +1850,8 @@ free_interfaces: if (!alt) alt = &intf->altsetting[0]; + intf->intf_assoc = + find_iad(dev, cp, alt->desc.bInterfaceNumber); intf->cur_altsetting = alt; usb_enable_interface(dev, intf, true); intf->dev.parent = &dev->dev; From beb42dd793193a3d4e72970bfa73cd8810f63cea Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 30 May 2012 15:35:17 -0400 Subject: [PATCH 449/475] Btrfs: pass locked_page into extent_clear_unlock_delalloc if theres an error While doing my enospc work I got a transaction abortion that resulted in a panic when we tried to unlock_page() an already unlocked page. This is because we aren't calling extent_clear_unlock_delalloc with the locked page so it was unlocking all the pages in the range. This is wrong since __extent_writepage expects to have the page locked still unless we return *page_started as 1. This should keep us from panicing. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 92df0a5d1d9..ccd6355af73 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode, if (IS_ERR(trans)) { extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, + start, end, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | @@ -963,7 +963,7 @@ out: out_unlock: extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, + start, end, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | From b939d1ab76b4aa816343cdbd8b44ce9929a3b9ef Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 31 May 2012 11:06:33 -0400 Subject: [PATCH 450/475] Btrfs: fix locking in btrfs_destroy_delayed_refs The transaction abort stuff was throwing warnings from the list debugging code because we do a list_del_init outside of the delayed_refs spin lock. The delayed refs locking makes baby Jesus cry so it's not hard to get wrong, but we need to take the ref head mutex to make sure it's not being processed currently, and so if it is we need to drop the spin lock and then take and drop the mutex and do the search again. If we can take the mutex then we can safely remove the head from the list and carry on. Now when the transaction aborts I don't get the list debugging warnings. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b99d5127ba1..c79ddc75608 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3400,7 +3400,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, delayed_refs = &trans->delayed_refs; -again: spin_lock(&delayed_refs->lock); if (delayed_refs->num_entries == 0) { spin_unlock(&delayed_refs->lock); @@ -3408,31 +3407,36 @@ again: return ret; } - node = rb_first(&delayed_refs->root); - while (node) { + while ((node = rb_first(&delayed_refs->root)) != NULL) { ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - node = rb_next(node); - - ref->in_tree = 0; - rb_erase(&ref->rb_node, &delayed_refs->root); - delayed_refs->num_entries--; atomic_set(&ref->refs, 1); if (btrfs_delayed_ref_is_head(ref)) { struct btrfs_delayed_ref_head *head; head = btrfs_delayed_node_to_head(ref); - spin_unlock(&delayed_refs->lock); - mutex_lock(&head->mutex); + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&ref->refs); + spin_unlock(&delayed_refs->lock); + + /* Need to wait for the delayed ref to run */ + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(ref); + + continue; + } + kfree(head->extent_op); delayed_refs->num_heads--; if (list_empty(&head->cluster)) delayed_refs->num_heads_ready--; list_del_init(&head->cluster); - mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - goto again; } + ref->in_tree = 0; + rb_erase(&ref->rb_node, &delayed_refs->root); + delayed_refs->num_entries--; + spin_unlock(&delayed_refs->lock); btrfs_put_delayed_ref(ref); From d7096fc3ef8360f30f228344bc564d4f97d8a158 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 31 May 2012 15:49:57 -0400 Subject: [PATCH 451/475] Btrfs: wake up transaction waiters when aborting a transaction I was getting lots of hung tasks and a NULL pointer dereference because we are not cleaning up the transaction properly when it aborts. First we need to reset the running_transaction to NULL so we don't get a bad dereference for any start_transaction callers after this. Also we cannot rely on waitqueue_active() since it's just a list_empty(), so just call wake_up() directly since that will do the barrier for us and such. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 9 +++------ fs/btrfs/transaction.c | 4 ++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c79ddc75608..19b4db70dcb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3589,16 +3589,13 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, /* FIXME: cleanup wait for commit */ cur_trans->in_commit = 1; cur_trans->blocked = 1; - if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) - wake_up(&root->fs_info->transaction_blocked_wait); + wake_up(&root->fs_info->transaction_blocked_wait); cur_trans->blocked = 0; - if (waitqueue_active(&root->fs_info->transaction_wait)) - wake_up(&root->fs_info->transaction_wait); + wake_up(&root->fs_info->transaction_wait); cur_trans->commit_done = 1; - if (waitqueue_active(&cur_trans->commit_wait)) - wake_up(&cur_trans->commit_wait); + wake_up(&cur_trans->commit_wait); btrfs_destroy_pending_snapshots(cur_trans); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1791c6e3d83..59e0203bfb9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1221,6 +1221,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->trans_lock); list_del_init(&cur_trans->list); + if (cur_trans == root->fs_info->running_transaction) { + root->fs_info->running_transaction = NULL; + root->fs_info->trans_no_join = 0; + } spin_unlock(&root->fs_info->trans_lock); btrfs_cleanup_one_transaction(trans->transaction, root); From 7b8b92af58db347de64a237861fcf13374b34a9c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 31 May 2012 15:52:43 -0400 Subject: [PATCH 452/475] Btrfs: abort the transaction if the commit fails If a transaction commit fails we don't abort it so we don't set an error on the file system. This patch fixes that by actually calling the abort stuff and then adding a check for a fs error in the transaction start stuff to make sure it is caught properly. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 59e0203bfb9..b72b068183e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -100,6 +100,10 @@ loop: kmem_cache_free(btrfs_transaction_cachep, cur_trans); cur_trans = fs_info->running_transaction; goto loop; + } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + spin_unlock(&root->fs_info->trans_lock); + kmem_cache_free(btrfs_transaction_cachep, cur_trans); + return -EROFS; } atomic_set(&cur_trans->num_writers, 1); @@ -1213,12 +1217,14 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, static void cleanup_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, int err) { struct btrfs_transaction *cur_trans = trans->transaction; WARN_ON(trans->use_count > 1); + btrfs_abort_transaction(trans, root, err); + spin_lock(&root->fs_info->trans_lock); list_del_init(&cur_trans->list); if (cur_trans == root->fs_info->running_transaction) { @@ -1530,7 +1536,7 @@ cleanup_transaction: // WARN_ON(1); if (current->journal_info == trans) current->journal_info = NULL; - cleanup_transaction(trans, root); + cleanup_transaction(trans, root, ret); return ret; } From ee670f0af35871edb492db5ba406cef36d1b7c21 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 31 May 2012 15:54:30 -0400 Subject: [PATCH 453/475] Btrfs: fix btrfs_destroy_marked_extents So we're forcing the eb's to have their ref count set to 1 so invalidatepage works but this breaks lots of things, for example root nodes, and is just plain wrong, we don't need to just evict all of this stuff. Also drop the invalidatepage altogether and add a page_cache_release(). With this patch we no longer hang when trying to access the root nodes after an aborted transaction and we no longer leak memory. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 19b4db70dcb..5a3bf323e2b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3524,11 +3524,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, offset >> PAGE_CACHE_SHIFT); spin_unlock(&dirty_pages->buffer_lock); - if (eb) { + if (eb) ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - atomic_set(&eb->refs, 1); - } if (PageWriteback(page)) end_page_writeback(page); @@ -3542,8 +3540,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, spin_unlock_irq(&page->mapping->tree_lock); } - page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); + page_cache_release(page); } } From 17ca04aff7e6171df684b7b65804df8830eb8c15 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 31 May 2012 15:58:55 -0400 Subject: [PATCH 454/475] Btrfs: unlock everything properly in the error case for nocow I was getting hung on umount when a transaction was aborted because a range of one of the free space inodes was still locked. This is because the nocow stuff doesn't unlock anything on error. This fixed the problem and I verified that is what was happening. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ccd6355af73..b7f398c36cb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1136,8 +1136,18 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); - if (!path) + if (!path) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); return -ENOMEM; + } nolock = btrfs_is_free_space_inode(root, inode); @@ -1147,6 +1157,15 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); btrfs_free_path(path); return PTR_ERR(trans); } @@ -1327,8 +1346,11 @@ out_check: } btrfs_release_path(path); - if (cur_offset <= end && cow_start == (u64)-1) + if (cur_offset <= end && cow_start == (u64)-1) { cow_start = cur_offset; + cur_offset = end; + } + if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, end, page_started, nr_written, 1); @@ -1347,6 +1369,17 @@ error: if (!ret) ret = err; + if (ret && cur_offset < end) + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + cur_offset, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); + btrfs_free_path(path); return ret; } From 606686eeac4550d2212bf3d621a810407ef5e9bf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 4 Jun 2012 14:03:51 -0400 Subject: [PATCH 455/475] Btrfs: use rcu to protect device->name Al pointed out that we can just toss out the old name on a device and add a new one arbitrarily, so anybody who uses device->name in printk could possibly use free'd memory. Instead of adding locking around all of this he suggested doing it with RCU, so I've introduced a struct rcu_string that does just that and have gone through and protected all accesses to device->name that aren't under the uuid_mutex with rcu_read_lock(). This protects us and I will use it for dealing with removing the device that we used to mount the file system in a later patch. Thanks, Reviewed-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/check-integrity.c | 16 ++++--- fs/btrfs/disk-io.c | 10 +++-- fs/btrfs/extent_io.c | 7 +-- fs/btrfs/ioctl.c | 13 ++++-- fs/btrfs/rcu-string.h | 56 +++++++++++++++++++++++ fs/btrfs/scrub.c | 30 ++++++++----- fs/btrfs/volumes.c | 92 ++++++++++++++++++++++++-------------- fs/btrfs/volumes.h | 2 +- 8 files changed, 162 insertions(+), 64 deletions(-) create mode 100644 fs/btrfs/rcu-string.h diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9cebb1fd6a3..da6e9364a5e 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -93,6 +93,7 @@ #include "print-tree.h" #include "locking.h" #include "check-integrity.h" +#include "rcu-string.h" #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 @@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror( superblock_tmp->never_written = 0; superblock_tmp->mirror_num = 1 + superblock_mirror_num; if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) - printk(KERN_INFO "New initial S-block (bdev %p, %s)" - " @%llu (%s/%llu/%d)\n", - superblock_bdev, device->name, - (unsigned long long)dev_bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - superblock_mirror_num); + printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" + " @%llu (%s/%llu/%d)\n", + superblock_bdev, + rcu_str_deref(device->name), + (unsigned long long)dev_bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + superblock_mirror_num); list_add(&superblock_tmp->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(superblock_tmp, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5a3bf323e2b..1c9664b16cd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,7 @@ #include "free-space-cache.h" #include "inode-map.h" #include "check-integrity.h" +#include "rcu-string.h" static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -2575,8 +2576,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) struct btrfs_device *device = (struct btrfs_device *) bh->b_private; - printk_ratelimited(KERN_WARNING "lost page write due to " - "I/O error on %s\n", device->name); + printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " + "I/O error on %s\n", + rcu_str_deref(device->name)); /* note, we dont' set_buffer_write_io_error because we have * our own ways of dealing with the IO errors */ @@ -2749,8 +2751,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - printk("btrfs: disabling barriers on dev %s\n", - device->name); + printk_in_rcu("btrfs: disabling barriers on dev %s\n", + rcu_str_deref(device->name)); device->nobarriers = 1; } if (!bio_flagged(bio, BIO_UPTODATE)) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2c8f7b20461..aaa12c1eb34 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@ #include "volumes.h" #include "check-integrity.h" #include "locking.h" +#include "rcu-string.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, return -EIO; } - printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " - "sector %llu)\n", page->mapping->host->i_ino, start, - dev->name, sector); + printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " + "(dev %s sector %llu)\n", page->mapping->host->i_ino, + start, rcu_str_deref(dev->name), sector); bio_put(bio); return 0; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 24b776c08d9..c5254dde1f0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -52,6 +52,7 @@ #include "locking.h" #include "inode-map.h" #include "backref.h" +#include "rcu-string.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -1345,8 +1346,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, do_div(new_size, root->sectorsize); new_size *= root->sectorsize; - printk(KERN_INFO "btrfs: new size for %s is %llu\n", - device->name, (unsigned long long)new_size); + printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", + rcu_str_deref(device->name), + (unsigned long long)new_size); if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); @@ -2264,7 +2266,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) di_args->total_bytes = dev->total_bytes; memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); if (dev->name) { - strncpy(di_args->path, dev->name, sizeof(di_args->path)); + struct rcu_string *name; + + rcu_read_lock(); + name = rcu_dereference(dev->name); + strncpy(di_args->path, name->str, sizeof(di_args->path)); + rcu_read_unlock(); di_args->path[sizeof(di_args->path) - 1] = 0; } else { di_args->path[0] = '\0'; diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h new file mode 100644 index 00000000000..9e111e4576d --- /dev/null +++ b/fs/btrfs/rcu-string.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +struct rcu_string { + struct rcu_head rcu; + char str[0]; +}; + +static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) +{ + size_t len = strlen(src) + 1; + struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) + + (len * sizeof(char)), mask); + if (!ret) + return ret; + strncpy(ret->str, src, len); + return ret; +} + +static inline void rcu_string_free(struct rcu_string *str) +{ + if (str) + kfree_rcu(str, rcu); +} + +#define printk_in_rcu(fmt, ...) do { \ + rcu_read_lock(); \ + printk(fmt, __VA_ARGS__); \ + rcu_read_unlock(); \ +} while (0) + +#define printk_ratelimited_in_rcu(fmt, ...) do { \ + rcu_read_lock(); \ + printk_ratelimited(fmt, __VA_ARGS__); \ + rcu_read_unlock(); \ +} while (0) + +#define rcu_str_deref(rcu_str) ({ \ + struct rcu_string *__str = rcu_dereference(rcu_str); \ + __str->str; \ +}) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a38cfa4f251..b223620cd5a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -26,6 +26,7 @@ #include "backref.h" #include "extent_io.h" #include "check-integrity.h" +#include "rcu-string.h" /* * This is only the first step towards a full-features scrub. It reads all @@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) * hold all of the paths here */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) - printk(KERN_WARNING "btrfs: %s at logical %llu on dev " + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " "%s, sector %llu, root %llu, inode %llu, offset %llu, " "length %llu, links %u (path: %s)\n", swarn->errstr, - swarn->logical, swarn->dev->name, + swarn->logical, rcu_str_deref(swarn->dev->name), (unsigned long long)swarn->sector, root, inum, offset, min(isize - offset, (u64)PAGE_SIZE), nlink, (char *)(unsigned long)ipath->fspath->val[i]); @@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) return 0; err: - printk(KERN_WARNING "btrfs: %s at logical %llu on dev " + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " "%s, sector %llu, root %llu, inode %llu, offset %llu: path " "resolving failed with ret=%d\n", swarn->errstr, - swarn->logical, swarn->dev->name, + swarn->logical, rcu_str_deref(swarn->dev->name), (unsigned long long)swarn->sector, root, inum, offset, ret); free_ipath(ipath); @@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) do { ret = tree_backref_for_extent(&ptr, eb, ei, item_size, &ref_root, &ref_level); - printk(KERN_WARNING + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev %s, " "sector %llu: metadata %s (level %d) in tree " - "%llu\n", errstr, swarn.logical, dev->name, + "%llu\n", errstr, swarn.logical, + rcu_str_deref(dev->name), (unsigned long long)swarn.sector, ref_level ? "node" : "leaf", ret < 0 ? -1 : ref_level, @@ -580,9 +582,11 @@ out: spin_lock(&sdev->stat_lock); ++sdev->stat.uncorrectable_errors; spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR + + printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", - (unsigned long long)fixup->logical, sdev->dev->name); + (unsigned long long)fixup->logical, + rcu_str_deref(sdev->dev->name)); } btrfs_free_path(path); @@ -936,18 +940,20 @@ corrected_error: spin_lock(&sdev->stat_lock); sdev->stat.corrected_errors++; spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR + printk_ratelimited_in_rcu(KERN_ERR "btrfs: fixed up error at logical %llu on dev %s\n", - (unsigned long long)logical, sdev->dev->name); + (unsigned long long)logical, + rcu_str_deref(sdev->dev->name)); } } else { did_not_correct_error: spin_lock(&sdev->stat_lock); sdev->stat.uncorrectable_errors++; spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR + printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", - (unsigned long long)logical, sdev->dev->name); + (unsigned long long)logical, + rcu_str_deref(sdev->dev->name)); } out: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7782020996f..8a3d2594b80 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -35,6 +35,7 @@ #include "volumes.h" #include "async-thread.h" #include "check-integrity.h" +#include "rcu-string.h" static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) device = list_entry(fs_devices->devices.next, struct btrfs_device, dev_list); list_del(&device->dev_list); - kfree(device->name); + rcu_string_free(device->name); kfree(device); } kfree(fs_devices); @@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path, { struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; + struct rcu_string *name; u64 found_transid = btrfs_super_generation(disk_super); - char *name; fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { @@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path, memcpy(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); spin_lock_init(&device->io_lock); - device->name = kstrdup(path, GFP_NOFS); - if (!device->name) { + + name = rcu_string_strdup(path, GFP_NOFS); + if (!name) { kfree(device); return -ENOMEM; } + rcu_assign_pointer(device->name, name); INIT_LIST_HEAD(&device->dev_alloc_list); /* init readahead state */ @@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path, device->fs_devices = fs_devices; fs_devices->num_devices++; - } else if (!device->name || strcmp(device->name, path)) { - name = kstrdup(path, GFP_NOFS); + } else if (!device->name || strcmp(device->name->str, path)) { + name = rcu_string_strdup(path, GFP_NOFS); if (!name) return -ENOMEM; - kfree(device->name); - device->name = name; + rcu_string_free(device->name); + rcu_assign_pointer(device->name, name); if (device->missing) { fs_devices->missing_devices--; device->missing = 0; @@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { + struct rcu_string *name; + device = kzalloc(sizeof(*device), GFP_NOFS); if (!device) goto error; - device->name = kstrdup(orig_dev->name, GFP_NOFS); - if (!device->name) { + /* + * This is ok to do without rcu read locked because we hold the + * uuid mutex so nothing we touch in here is going to disappear. + */ + name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); + if (!name) { kfree(device); goto error; } + rcu_assign_pointer(device->name, name); device->devid = orig_dev->devid; device->work.func = pending_bios_fn; @@ -491,7 +501,7 @@ again: } list_del_init(&device->dev_list); fs_devices->num_devices--; - kfree(device->name); + rcu_string_free(device->name); kfree(device); } @@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work) if (device->bdev) blkdev_put(device->bdev, device->mode); - kfree(device->name); + rcu_string_free(device->name); kfree(device); } @@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { struct btrfs_device *new_device; + struct rcu_string *name; if (device->bdev) fs_devices->open_devices--; @@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device = kmalloc(sizeof(*new_device), GFP_NOFS); BUG_ON(!new_device); /* -ENOMEM */ memcpy(new_device, device, sizeof(*new_device)); - new_device->name = kstrdup(device->name, GFP_NOFS); - BUG_ON(device->name && !new_device->name); /* -ENOMEM */ + + /* Safe because we are under uuid_mutex */ + name = rcu_string_strdup(device->name->str, GFP_NOFS); + BUG_ON(device->name && !name); /* -ENOMEM */ + rcu_assign_pointer(new_device->name, name); new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; @@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (!device->name) continue; - bdev = blkdev_get_by_path(device->name, flags, holder); + bdev = blkdev_get_by_path(device->name->str, flags, holder); if (IS_ERR(bdev)) { - printk(KERN_INFO "open %s failed\n", device->name); + printk(KERN_INFO "open %s failed\n", device->name->str); goto error; } filemap_write_and_wait(bdev->bd_inode->i_mapping); @@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) struct block_device *bdev; struct list_head *devices; struct super_block *sb = root->fs_info->sb; + struct rcu_string *name; u64 total_bytes; int seeding_dev = 0; int ret = 0; @@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) goto error; } - device->name = kstrdup(device_path, GFP_NOFS); - if (!device->name) { + name = rcu_string_strdup(device_path, GFP_NOFS); + if (!name) { kfree(device); ret = -ENOMEM; goto error; } + rcu_assign_pointer(device->name, name); ret = find_next_devid(root, &device->devid); if (ret) { - kfree(device->name); + rcu_string_free(device->name); kfree(device); goto error; } trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { - kfree(device->name); + rcu_string_free(device->name); kfree(device); ret = PTR_ERR(trans); goto error; @@ -1796,7 +1812,7 @@ error_trans: unlock_chunks(root); btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); - kfree(device->name); + rcu_string_free(device->name); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; dev = bbio->stripes[dev_nr].dev; if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { +#ifdef DEBUG + struct rcu_string *name; + + rcu_read_lock(); + name = rcu_dereference(dev->name); pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " "(%s id %llu), size=%u\n", rw, (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, - dev->name, dev->devid, bio->bi_size); + name->str, dev->devid, bio->bi_size); + rcu_read_unlock(); +#endif bio->bi_bdev = dev->bdev; if (async_submit) schedule_bio(root, dev, rw, bio); @@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) key.offset = device->devid; ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { - printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", - device->name, (unsigned long long)device->devid); + printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", + rcu_str_deref(device->name), + (unsigned long long)device->devid); __btrfs_reset_dev_stats(device); device->dev_stats_valid = 1; btrfs_release_path(path); @@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", - ret, device->name); + printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", + ret, rcu_str_deref(device->name)); goto out; } @@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, /* need to delete old one and insert a new one */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", - device->name, ret); + printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", + rcu_str_deref(device->name), ret); goto out; } ret = 1; @@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", - device->name, ret); + printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", + rcu_str_deref(device->name), ret); goto out; } } @@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) { if (!dev->dev_stats_valid) return; - printk_ratelimited(KERN_ERR + printk_ratelimited_in_rcu(KERN_ERR "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", - dev->name, + rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), @@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) { - printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", - dev->name, + printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3406a88ca83..74366f27a76 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -58,7 +58,7 @@ struct btrfs_device { /* the mode sent to blkdev_get */ fmode_t mode; - char *name; + struct rcu_string *name; /* the internal btrfs device id */ u64 devid; From 9c5085c147989d48dfe74194b48affc23f376650 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Jun 2012 14:13:12 -0400 Subject: [PATCH 456/475] Btrfs: implement ->show_devname Because btrfs can remove the device that was mounted we need to have a ->show_devname so that in this case we can print out some other device in the file system to /proc/mount. So if there are multiple devices in a btrfs file system we will just print the device with the lowest devid that we can find. This will make everything consistent and deal with device removal properly. The drawback is if you mount with a device that is higher than the lowest devicd it won't show up as the mounted device in /proc/mounts, but this is a small price to pay. This was inspired by Miao Xie's patch. Thanks, Reviewed-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/super.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 96eb9fef7bd..0eb9a4da069 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -54,6 +54,7 @@ #include "version.h" #include "export.h" #include "compression.h" +#include "rcu-string.h" #define CREATE_TRACE_POINTS #include @@ -1482,12 +1483,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags) "error %d\n", btrfs_ino(inode), ret); } +static int btrfs_show_devname(struct seq_file *m, struct dentry *root) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); + struct btrfs_fs_devices *cur_devices; + struct btrfs_device *dev, *first_dev = NULL; + struct list_head *head; + struct rcu_string *name; + + mutex_lock(&fs_info->fs_devices->device_list_mutex); + cur_devices = fs_info->fs_devices; + while (cur_devices) { + head = &cur_devices->devices; + list_for_each_entry(dev, head, dev_list) { + if (!first_dev || dev->devid < first_dev->devid) + first_dev = dev; + } + cur_devices = cur_devices->seed; + } + + if (first_dev) { + rcu_read_lock(); + name = rcu_dereference(first_dev->name); + seq_escape(m, name->str, " \t\n\\"); + rcu_read_unlock(); + } else { + WARN_ON(1); + } + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + return 0; +} + static const struct super_operations btrfs_super_ops = { .drop_inode = btrfs_drop_inode, .evict_inode = btrfs_evict_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, + .show_devname = btrfs_show_devname, .write_inode = btrfs_write_inode, .dirty_inode = btrfs_fs_dirty_inode, .alloc_inode = btrfs_alloc_inode, From 8180ef8894fa402443205cff1e23417e8d3434df Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 8 Jun 2012 15:16:12 -0400 Subject: [PATCH 457/475] Btrfs: keep inode pinned when compressing writes A user reported lots of problems using compression on the new code and it turns out part of the problem was that igrab() was failing when we added a new ordered extent. This is because when writing out an inode under compression we immediately return without actually doing anything to the pages, and then in another thread at some point down the line actually do the ordered dance. The problem is between the point that we start writeback and we actually add the ordered extent we could be trying to reclaim the inode, which makes igrab() return NULL. So we need to do an igrab() when we create the async extent and then drop it when we are done with it. This makes sure we stay pinned in memory until the ordered extent can get a reference on it and we are good to go. With this patch we no longer panic in btrfs_finish_ordered_io(). Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b7f398c36cb..06075043da5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work) compress_file_range(async_cow->inode, async_cow->locked_page, async_cow->start, async_cow->end, async_cow, &num_added); - if (num_added == 0) + if (num_added == 0) { + iput(async_cow->inode); async_cow->inode = NULL; + } } /* @@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work) { struct async_cow *async_cow; async_cow = container_of(work, struct async_cow, work); + if (async_cow->inode) + iput(async_cow->inode); kfree(async_cow); } @@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); BUG_ON(!async_cow); /* -ENOMEM */ - async_cow->inode = inode; + async_cow->inode = igrab(inode); async_cow->root = root; async_cow->locked_page = locked_page; async_cow->start = start; From 7ddf5a42d311d74fd9f7373cb56def0843c219f8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 8 Jun 2012 15:26:47 -0400 Subject: [PATCH 458/475] Btrfs: call filemap_fdatawrite twice for compression I removed this in an earlier commit and I was wrong. Because compression can return from filemap_fdatawrite() without having actually set any of it's pages as writeback() it can make filemap_fdatawait() do essentially nothing, and then we won't find any ordered extents because they may not have been created yet. So not only does this make fsync() completely useless, but it will also screw up if you truncate on a non-page aligned offset since we zero out the end and then wait on ordered extents and then call drop caches. We can drop the cache before the io completes and then we try to unpin the extent we just wrote we won't find it and everything goes sideways. So fix this by putting it back and put a giant comment there to keep me from trying to remove it in the future. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/inode.c | 15 +++++++++------ fs/btrfs/ordered-data.c | 22 +++++++++++++++++++++- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e616f8872e6..12394a90d60 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -37,6 +37,7 @@ #define BTRFS_INODE_IN_DEFRAG 3 #define BTRFS_INODE_DELALLOC_META_RESERVED 4 #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 +#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 /* in memory btrfs inode */ struct btrfs_inode { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 06075043da5..7a090fb4eb9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1398,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, int ret; struct btrfs_root *root = BTRFS_I(inode)->root; - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); - else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) + } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - else if (!btrfs_test_opt(root, COMPRESS) && - !(BTRFS_I(inode)->force_compress) && - !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) + } else if (!btrfs_test_opt(root, COMPRESS) && + !(BTRFS_I(inode)->force_compress) && + !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) { ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); - else + } else { + set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags); ret = cow_file_range_async(inode, locked_page, start, end, page_started, nr_written); + } return ret; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9e138cdc36c..643335a4fe3 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) /* start IO across the range first to instantiate any delalloc * extents */ - filemap_write_and_wait_range(inode->i_mapping, start, orig_end); + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + /* + * So with compression we will find and lock a dirty page and clear the + * first one as dirty, setup an async extent, and immediately return + * with the entire range locked but with nobody actually marked with + * writeback. So we can't just filemap_write_and_wait_range() and + * expect it to work since it will just kick off a thread to do the + * actual work. So we need to call filemap_fdatawrite_range _again_ + * since it will wait on the page lock, which won't be unlocked until + * after the pages have been marked as writeback and so we're good to go + * from there. We have to do this otherwise we'll miss the ordered + * extents and that results in badness. Please Josef, do not think you + * know better and pull this out at some point in the future, it is + * right and you are wrong. + */ + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; found = 0; From 6c282eb40ed6f64a51ff447707714df551d85b8e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 11 Jun 2012 16:03:35 +0800 Subject: [PATCH 459/475] Btrfs: fix defrag regression If a file has 3 small extents: | ext1 | ext2 | ext3 | Running "btrfs fi defrag" will only defrag the last two extents, if those extent mappings hasn't been read into memory from disk. This bug was introduced by commit 17ce6ef8d731af5edac8c39e806db4c7e1f6956f ("Btrfs: add a check to decide if we should defrag the range") The cause is, that commit looked into previous and next extents using lookup_extent_mapping() only. While at it, remove the code that checks the previous extent, since it's sufficient to check the next extent. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 101 ++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c5254dde1f0..a98f7d25282 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -786,48 +786,12 @@ none: return -ENOENT; } -/* - * Validaty check of prev em and next em: - * 1) no prev/next em - * 2) prev/next em is an hole/inline extent - */ -static int check_adjacent_extents(struct inode *inode, struct extent_map *em) +static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *prev = NULL, *next = NULL; - int ret = 0; - - read_lock(&em_tree->lock); - prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); - next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1); - read_unlock(&em_tree->lock); - - if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && - (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) - ret = 1; - free_extent_map(prev); - free_extent_map(next); - - return ret; -} - -static int should_defrag_range(struct inode *inode, u64 start, u64 len, - int thresh, u64 *last_len, u64 *skip, - u64 *defrag_end) -{ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct extent_map *em = NULL; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - int ret = 1; - - /* - * make sure that once we start defragging an extent, we keep on - * defragging it - */ - if (start < *defrag_end) - return 1; - - *skip = 0; + struct extent_map *em; + u64 len = PAGE_CACHE_SIZE; /* * hopefully we have this extent in the tree already, try without @@ -844,27 +808,64 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, unlock_extent(io_tree, start, start + len - 1); if (IS_ERR(em)) - return 0; + return NULL; } + return em; +} + +static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) +{ + struct extent_map *next; + bool ret = true; + + /* this is the last extent */ + if (em->start + em->len >= i_size_read(inode)) + return false; + + next = defrag_lookup_extent(inode, em->start + em->len); + if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) + ret = false; + + free_extent_map(next); + return ret; +} + +static int should_defrag_range(struct inode *inode, u64 start, int thresh, + u64 *last_len, u64 *skip, u64 *defrag_end) +{ + struct extent_map *em; + int ret = 1; + bool next_mergeable = true; + + /* + * make sure that once we start defragging an extent, we keep on + * defragging it + */ + if (start < *defrag_end) + return 1; + + *skip = 0; + + em = defrag_lookup_extent(inode, start); + if (!em) + return 0; + /* this will cover holes, and inline extents */ if (em->block_start >= EXTENT_MAP_LAST_BYTE) { ret = 0; goto out; } - /* If we have nothing to merge with us, just skip. */ - if (check_adjacent_extents(inode, em)) { - ret = 0; - goto out; - } + next_mergeable = defrag_check_next_extent(inode, em); /* - * we hit a real extent, if it is big don't bother defragging it again + * we hit a real extent, if it is big or the next extent is not a + * real extent, don't bother defragging it */ - if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) + if ((*last_len == 0 || *last_len >= thresh) && + (em->len >= thresh || !next_mergeable)) ret = 0; - out: /* * last_len ends up being a counter of how many bytes we've defragged. @@ -1143,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, break; if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, extent_thresh, - &last_len, &skip, &defrag_end)) { + extent_thresh, &last_len, &skip, + &defrag_end)) { unsigned long next; /* * the should_defrag function tells us how much to skip From 69e380d176e73653e09ce2cf3a5dc09682a69229 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 11 Jun 2012 17:10:51 +0800 Subject: [PATCH 460/475] Btrfs: fix incompat flags setting It's a bug, but it happens to work, as BTRFS_COMPRESS_LZO == 2, which has only one bit set. Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c9664b16cd..9a569aef72e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2119,7 +2119,7 @@ int open_ctree(struct super_block *sb, features = btrfs_super_incompat_flags(disk_super); features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) + if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; /* From f948501b36c6b3d9352ce212a197098a7e958971 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 15 Jun 2012 14:51:39 +1000 Subject: [PATCH 461/475] Make hard_irq_disable() actually hard-disable interrupts At present, hard_irq_disable() does nothing on powerpc because of this code in include/linux/interrupt.h: #ifndef hard_irq_disable #define hard_irq_disable() do { } while(0) #endif So we need to make our hard_irq_disable be a macro. It was previously a macro until commit 7230c56441 ("powerpc: Rework lazy-interrupt handling") changed it to a static inline function. Cc: stable@vger.kernel.org Acked-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras -- arch/powerpc/include/asm/hw_irq.h | 3 +++ 1 file changed, 3 insertions(+) --- arch/powerpc/include/asm/hw_irq.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index c9aac24b02e..32b394f3b85 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -100,6 +100,9 @@ static inline void hard_irq_disable(void) get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; } +/* include/linux/interrupt.h needs hard_irq_disable to be a macro */ +#define hard_irq_disable hard_irq_disable + /* * This is called by asynchronous interrupts to conditionally * re-enable hard interrupts when soft-disabled after having From bc1782374b128103ae9689e0753e0610f35b6bfd Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Jun 2012 02:23:18 -0600 Subject: [PATCH 462/475] Btrfs: fix missing inherited flag in rename When we move a file into a directory with compression flag, we need to inherite BTRFS_INODE_COMPRESS and clear BTRFS_INODE_NOCOMPRESS as well. But if we move a file into a directory without compression flag, we need to clear both of them. It is the way how our setflags deals with compression flag, so keep the same behaviour here. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7a090fb4eb9..3f2c8cbe5ba 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7122,10 +7122,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode) else b_inode->flags &= ~BTRFS_INODE_NODATACOW; - if (b_dir->flags & BTRFS_INODE_COMPRESS) + if (b_dir->flags & BTRFS_INODE_COMPRESS) { b_inode->flags |= BTRFS_INODE_COMPRESS; - else - b_inode->flags &= ~BTRFS_INODE_COMPRESS; + b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS; + } else { + b_inode->flags &= ~(BTRFS_INODE_COMPRESS | + BTRFS_INODE_NOCOMPRESS); + } } static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, From 4e42ae1bdcda77fc958a17d7ff4ba5a9c9c207da Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Jun 2012 02:23:19 -0600 Subject: [PATCH 463/475] Btrfs: do not resize a seeding device Seeding devices are not supposed to change any more. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a98f7d25282..58adbd0356d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1306,6 +1306,13 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, ret = -EINVAL; goto out_free; } + if (device->fs_devices && device->fs_devices->seeding) { + printk(KERN_INFO "btrfs: resizer unable to apply on " + "seeding device %llu\n", devid); + ret = -EINVAL; + goto out_free; + } + if (!strcmp(sizestr, "max")) new_size = device->bdev->bd_inode->i_size; else { From 6e841e32b159e298debbb2cb0e9158e894fcaf05 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Jun 2012 02:23:20 -0600 Subject: [PATCH 464/475] Btrfs: avoid memory leak of extent state in error handling routine We've forgotten to clear extent states in pinned tree, which will results in space counter mismatch and memory leak: WARNING: at fs/btrfs/extent-tree.c:7537 btrfs_free_block_groups+0x1f3/0x2e0 [btrfs]() ... space_info 2 has 8380416 free, is not full space_info total=12582912, used=4096, pinned=4096, reserved=0, may_use=0, readonly=4194304 btrfs state leak: start 29364224 end 29376511 state 1 in tree ffff880075f20090 refs 1 ... Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9a569aef72e..63a36232788 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3601,6 +3601,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, EXTENT_DIRTY); + btrfs_destroy_pinned_extent(root, + root->fs_info->pinned_extents); /* memset(cur_trans, 0, sizeof(*cur_trans)); From ed0eaa14981e87a1e185b61e4ef621c440e3930c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Jun 2012 02:23:21 -0600 Subject: [PATCH 465/475] Btrfs: make sure that we've made everything in pinned tree clean Since we have two trees for recording pinned extents, we need to go through both of them to make sure that we've done everything clean. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 63a36232788..ffdd76bf05d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3557,8 +3557,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, u64 start; u64 end; int ret; + bool loop = true; unpin = pinned_extents; +again: while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -3576,6 +3578,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, cond_resched(); } + if (loop) { + if (unpin == &root->fs_info->freed_extents[0]) + unpin = &root->fs_info->freed_extents[1]; + else + unpin = &root->fs_info->freed_extents[0]; + loop = false; + goto again; + } + return 0; } From 67cde3448d951b55088a6ea3bb1aee0160068fb9 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 14 Jun 2012 02:23:22 -0600 Subject: [PATCH 466/475] Btrfs: destroy the items of the delayed inodes in error handling routine the items of the delayed inodes were forgotten to be freed, this patch fixes it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 18 ++++++++++++++++++ fs/btrfs/delayed-inode.h | 3 +++ fs/btrfs/disk-io.c | 6 ++++++ 3 files changed, 27 insertions(+) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c18d0442ae6..2399f408691 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) } } } + +void btrfs_destroy_delayed_inodes(struct btrfs_root *root) +{ + struct btrfs_delayed_root *delayed_root; + struct btrfs_delayed_node *curr_node, *prev_node; + + delayed_root = btrfs_get_delayed_root(root); + + curr_node = btrfs_first_delayed_node(delayed_root); + while (curr_node) { + __btrfs_kill_delayed_node(curr_node); + + prev_node = curr_node; + curr_node = btrfs_next_delayed_node(curr_node); + btrfs_release_delayed_node(prev_node); + } +} + diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 7083d08b2a2..f5aa4023d3e 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev); /* Used for drop dead root */ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); +/* Used for clean the transaction */ +void btrfs_destroy_delayed_inodes(struct btrfs_root *root); + /* Used for readdir() */ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, struct list_head *del_list); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ffdd76bf05d..e22c5bbf022 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3608,6 +3608,9 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, cur_trans->commit_done = 1; wake_up(&cur_trans->commit_wait); + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + btrfs_destroy_pending_snapshots(cur_trans); btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, @@ -3662,6 +3665,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + btrfs_destroy_pending_snapshots(t); btrfs_destroy_delalloc_inodes(root); From 9c106405ddf893fcd04cd46555464417d2df8451 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Jun 2012 02:23:25 -0600 Subject: [PATCH 467/475] Btrfs: update MAINTAINERS info for BTRFS FILE SYSTEM Update to the latest btrfs's maintainer mail and git repo. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b3627098650..8bfa7f9d984 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1630,11 +1630,11 @@ S: Maintained F: drivers/gpio/gpio-bt8xx.c BTRFS FILE SYSTEM -M: Chris Mason +M: Chris Mason L: linux-btrfs@vger.kernel.org W: http://btrfs.wiki.kernel.org/ Q: http://patchwork.kernel.org/project/linux-btrfs/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git S: Maintained F: Documentation/filesystems/btrfs.txt F: fs/btrfs/ From 4325edd0786fdd3909f9550e52a963b2fe54f78b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Jun 2012 20:02:02 -0400 Subject: [PATCH 468/475] Btrfs: init old_generation in get_old_root gcc was giving an uninit variable warning here. Strictly speaking we don't need to init it, but this will make things much less error prone. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 04b06bcf2ca..15cbc2bf4ff 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1175,7 +1175,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) struct tree_mod_elem *tm; struct extent_buffer *eb; struct tree_mod_root *old_root = NULL; - u64 old_generation; + u64 old_generation = 0; u64 logical; eb = btrfs_read_lock_root_node(root); From a8c4a33b98b097e69cd1a10672c43d17ad0ffb25 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Jun 2012 20:07:17 -0400 Subject: [PATCH 469/475] Btrfs: cast devid to unsigned long long for printk %llu Avoid warning in 32 bit machines Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 58adbd0356d..0e92e576300 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1308,7 +1308,8 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, } if (device->fs_devices && device->fs_devices->seeding) { printk(KERN_INFO "btrfs: resizer unable to apply on " - "seeding device %llu\n", devid); + "seeding device %llu\n", + (unsigned long long)devid); ret = -EINVAL; goto out_free; } From 9b15b817f3d62409290fd56fe3cbb076a931bb0a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 15 Jun 2012 17:55:50 -0700 Subject: [PATCH 470/475] swap: fix shmem swapping when more than 8 areas Minchan Kim reports that when a system has many swap areas, and tmpfs swaps out to the ninth or more, shmem_getpage_gfp()'s attempts to read back the page cannot locate it, and the read fails with -ENOMEM. Whoops. Yes, I blindly followed read_swap_header()'s pte_to_swp_entry( swp_entry_to_pte()) technique for determining maximum usable swap offset, without stopping to realize that that actually depends upon the pte swap encoding shifting swap offset to the higher bits and truncating it there. Whereas our radix_tree swap encoding leaves offset in the lower bits: it's swap "type" (that is, index of swap area) that was truncated. Fix it by reducing the SWP_TYPE_SHIFT() in swapops.h, and removing the broken radix_to_swp_entry(swp_to_radix_entry()) from read_swap_header(). This does not reduce the usable size of a swap area any further, it leaves it as claimed when making the original commit: no change from 3.0 on x86_64, nor on i386 without PAE; but 3.0's 512GB is reduced to 128GB per swapfile on i386 with PAE. It's not a change I would have risked five years ago, but with x86_64 supported for ten years, I believe it's appropriate now. Hmm, and what if some architecture implements its swap pte with offset encoded below type? That would equally break the maximum usable swap offset check. Happily, they all follow the same tradition of encoding offset above type, but I'll prepare a check on that for next. Reported-and-Reviewed-and-Tested-by: Minchan Kim Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org [3.1, 3.2, 3.3, 3.4] Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 8 +++++--- mm/swapfile.c | 12 ++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 792d16d9cbc..47ead515c81 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -9,13 +9,15 @@ * get good packing density in that tree, so the index should be dense in * the low-order bits. * - * We arrange the `type' and `offset' fields so that `type' is at the five + * We arrange the `type' and `offset' fields so that `type' is at the seven * high-order bits of the swp_entry_t and `offset' is right-aligned in the - * remaining bits. + * remaining bits. Although `type' itself needs only five bits, we allow for + * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ -#define SWP_TYPE_SHIFT(e) (sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT) +#define SWP_TYPE_SHIFT(e) ((sizeof(e.val) * 8) - \ + (MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT)) #define SWP_OFFSET_MASK(e) ((1UL << SWP_TYPE_SHIFT(e)) - 1) /* diff --git a/mm/swapfile.c b/mm/swapfile.c index de5bc51c4a6..71373d03fce 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1916,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p, /* * Find out how many pages are allowed for a single swap - * device. There are three limiting factors: 1) the number + * device. There are two limiting factors: 1) the number * of bits for the swap offset in the swp_entry_t type, and * 2) the number of bits in the swap pte as defined by the - * the different architectures, and 3) the number of free bits - * in an exceptional radix_tree entry. In order to find the + * different architectures. In order to find the * largest possible bit mask, a swap entry with swap type 0 * and swap offset ~0UL is created, encoded to a swap pte, * decoded to a swp_entry_t again, and finally the swap * offset is extracted. This will mask all the bits from * the initial ~0UL mask that can't be encoded in either * the swp_entry_t or the architecture definition of a - * swap pte. Then the same is done for a radix_tree entry. + * swap pte. */ maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))); - maxpages = swp_offset(radix_to_swp_entry( - swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; - + swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; if (maxpages > swap_header->info.last_page) { maxpages = swap_header->info.last_page + 1; /* p->max is an unsigned int: don't overflow it */ From 6699c8cda36f296e315068ed9bbe4f03e6ed4044 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 16 Jun 2012 16:53:06 -0400 Subject: [PATCH 471/475] tile: fix bug in get_user() for 4-byte values The definition of 32-bit values in the 64-bit tilegx architecture is that they should be sign-extended regardless of whether they are considered signed or unsigned by the compiler. Accordingly, we need to use an "ld4s" rather than "ld4u" to load and sign-extend for get_user(). This fixes glibc bug 14238 (see http://sourceware.org/bugzilla), introduced during the 3.5 merge window. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index c3dd275f25e..9ab078a4605 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -146,7 +146,7 @@ extern int fixup_exception(struct pt_regs *regs); #ifdef __tilegx__ #define __get_user_1(x, ptr, ret) __get_user_asm(ld1u, x, ptr, ret) #define __get_user_2(x, ptr, ret) __get_user_asm(ld2u, x, ptr, ret) -#define __get_user_4(x, ptr, ret) __get_user_asm(ld4u, x, ptr, ret) +#define __get_user_4(x, ptr, ret) __get_user_asm(ld4s, x, ptr, ret) #define __get_user_8(x, ptr, ret) __get_user_asm(ld, x, ptr, ret) #else #define __get_user_1(x, ptr, ret) __get_user_asm(lb_u, x, ptr, ret) From 485802a6c524e62b5924849dd727ddbb1497cc71 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Jun 2012 17:25:17 -0700 Subject: [PATCH 472/475] Linux 3.5-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d845c2a1aa6..b771af58338 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Saber-toothed Squirrel # *DOCUMENTATION* From fc3ebd788e894b4dd6c9524cb3874eeeb1e862d6 Mon Sep 17 00:00:00 2001 From: Krystian Garbaciak Date: Fri, 15 Jun 2012 11:23:56 +0100 Subject: [PATCH 473/475] regmap: Move lock out from internal function _regmap_update_bits(). Locks are moved to regmap_update_bits(), which allows to reenter internal function _regmap_update_bits() from inside of regmap read/write routines. Signed-off-by: Krystian Garbaciak Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index c89aa01fb1d..652017991da 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -982,11 +982,9 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg, int ret; unsigned int tmp, orig; - map->lock(map); - ret = _regmap_read(map, reg, &orig); if (ret != 0) - goto out; + return ret; tmp = orig & ~mask; tmp |= val & mask; @@ -998,9 +996,6 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg, *change = false; } -out: - map->unlock(map); - return ret; } @@ -1018,7 +1013,13 @@ int regmap_update_bits(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val) { bool change; - return _regmap_update_bits(map, reg, mask, val, &change); + int ret; + + map->lock(map); + ret = _regmap_update_bits(map, reg, mask, val, &change); + map->unlock(map); + + return ret; } EXPORT_SYMBOL_GPL(regmap_update_bits); @@ -1038,7 +1039,12 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val, bool *change) { - return _regmap_update_bits(map, reg, mask, val, change); + int ret; + + map->lock(map); + ret = _regmap_update_bits(map, reg, mask, val, change); + map->unlock(map); + return ret; } EXPORT_SYMBOL_GPL(regmap_update_bits_check); From 6863ca6227598d15c372f1e03449bbb4cfbcca7f Mon Sep 17 00:00:00 2001 From: Krystian Garbaciak Date: Fri, 15 Jun 2012 11:23:56 +0100 Subject: [PATCH 474/475] regmap: Add support for register indirect addressing. Devices with register paging or indirectly accessed registers can configure register mapping to map those on virtual address range. During access to virtually mapped register range, indirect addressing is processed automatically, in following steps: 1. selector for page or indirect register is updated (when needed); 2. register in data window is accessed. Configuration should provide minimum and maximum register for virtual range, details of selector field for page selection, minimum and maximum register of data window for indirect access. Virtual range registers are managed by cache as well as direct access registers. In order to make indirect access more efficient, selector register should be declared as non-volatile, if possible. struct regmap_config is extended with the following: struct regmap_range_cfg *ranges; unsigned int n_ranges; [Also reordered debugfs init to later on since the cleanup code was conflicting with the new cleanup code for ranges anyway -- broonie] Signed-off-by: Krystian Garbaciak Signed-off-by: Mark Brown --- drivers/base/regmap/internal.h | 17 +++ drivers/base/regmap/regmap.c | 201 ++++++++++++++++++++++++++++++++- include/linux/regmap.h | 39 +++++++ 3 files changed, 252 insertions(+), 5 deletions(-) diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index b986b8660b0..80f9ab9c3aa 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -95,6 +95,9 @@ struct regmap { /* if set, converts bulk rw to single rw */ bool use_single_rw; + + struct rb_root range_tree; + void *selector_work_buf; /* Scratch buffer used for selector */ }; struct regcache_ops { @@ -115,6 +118,20 @@ bool regmap_precious(struct regmap *map, unsigned int reg); int _regmap_write(struct regmap *map, unsigned int reg, unsigned int val); +struct regmap_range_node { + struct rb_node node; + + unsigned int range_min; + unsigned int range_max; + + unsigned int selector_reg; + unsigned int selector_mask; + int selector_shift; + + unsigned int window_start; + unsigned int window_len; +}; + #ifdef CONFIG_DEBUG_FS extern void regmap_debugfs_initcall(void); extern void regmap_debugfs_init(struct regmap *map, const char *name); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 652017991da..83a0166420a 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -15,12 +15,17 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include #include "internal.h" +static int _regmap_update_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change); + bool regmap_writeable(struct regmap *map, unsigned int reg) { if (map->max_register && reg > map->max_register) @@ -208,6 +213,67 @@ static void dev_get_regmap_release(struct device *dev, void *res) */ } +static bool _regmap_range_add(struct regmap *map, + struct regmap_range_node *data) +{ + struct rb_root *root = &map->range_tree; + struct rb_node **new = &(root->rb_node), *parent = NULL; + + while (*new) { + struct regmap_range_node *this = + container_of(*new, struct regmap_range_node, node); + + parent = *new; + if (data->range_max < this->range_min) + new = &((*new)->rb_left); + else if (data->range_min > this->range_max) + new = &((*new)->rb_right); + else + return false; + } + + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); + + return true; +} + +static struct regmap_range_node *_regmap_range_lookup(struct regmap *map, + unsigned int reg) +{ + struct rb_node *node = map->range_tree.rb_node; + + while (node) { + struct regmap_range_node *this = + container_of(node, struct regmap_range_node, node); + + if (reg < this->range_min) + node = node->rb_left; + else if (reg > this->range_max) + node = node->rb_right; + else + return this; + } + + return NULL; +} + +static void regmap_range_exit(struct regmap *map) +{ + struct rb_node *next; + struct regmap_range_node *range_node; + + next = rb_first(&map->range_tree); + while (next) { + range_node = rb_entry(next, struct regmap_range_node, node); + next = rb_next(&range_node->node); + rb_erase(&range_node->node, &map->range_tree); + kfree(range_node); + } + + kfree(map->selector_work_buf); +} + /** * regmap_init(): Initialise register map * @@ -227,6 +293,7 @@ struct regmap *regmap_init(struct device *dev, { struct regmap *map, **m; int ret = -EINVAL; + int i, j; if (!bus || !config) goto err; @@ -364,27 +431,88 @@ struct regmap *regmap_init(struct device *dev, goto err_map; } - regmap_debugfs_init(map, config->name); + map->range_tree = RB_ROOT; + for (i = 0; i < config->n_ranges; i++) { + const struct regmap_range_cfg *range_cfg = &config->ranges[i]; + struct regmap_range_node *new; + + /* Sanity check */ + if (range_cfg->range_max < range_cfg->range_min || + range_cfg->range_max > map->max_register || + range_cfg->selector_reg > map->max_register || + range_cfg->window_len == 0) + goto err_range; + + /* Make sure, that this register range has no selector + or data window within its boundary */ + for (j = 0; j < config->n_ranges; j++) { + unsigned sel_reg = config->ranges[j].selector_reg; + unsigned win_min = config->ranges[j].window_start; + unsigned win_max = win_min + + config->ranges[j].window_len - 1; + + if (range_cfg->range_min <= sel_reg && + sel_reg <= range_cfg->range_max) { + goto err_range; + } + + if (!(win_max < range_cfg->range_min || + win_min > range_cfg->range_max)) { + goto err_range; + } + } + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (new == NULL) { + ret = -ENOMEM; + goto err_range; + } + + new->range_min = range_cfg->range_min; + new->range_max = range_cfg->range_max; + new->selector_reg = range_cfg->selector_reg; + new->selector_mask = range_cfg->selector_mask; + new->selector_shift = range_cfg->selector_shift; + new->window_start = range_cfg->window_start; + new->window_len = range_cfg->window_len; + + if (_regmap_range_add(map, new) == false) { + kfree(new); + goto err_range; + } + + if (map->selector_work_buf == NULL) { + map->selector_work_buf = + kzalloc(map->format.buf_size, GFP_KERNEL); + if (map->selector_work_buf == NULL) { + ret = -ENOMEM; + goto err_range; + } + } + } ret = regcache_init(map, config); if (ret < 0) - goto err_debugfs; + goto err_range; + + regmap_debugfs_init(map, config->name); /* Add a devres resource for dev_get_regmap() */ m = devres_alloc(dev_get_regmap_release, sizeof(*m), GFP_KERNEL); if (!m) { ret = -ENOMEM; - goto err_cache; + goto err_debugfs; } *m = map; devres_add(dev, m); return map; -err_cache: - regcache_exit(map); err_debugfs: regmap_debugfs_exit(map); + regcache_exit(map); +err_range: + regmap_range_exit(map); kfree(map->work_buf); err_map: kfree(map); @@ -481,6 +609,7 @@ void regmap_exit(struct regmap *map) { regcache_exit(map); regmap_debugfs_exit(map); + regmap_range_exit(map); if (map->bus->free_context) map->bus->free_context(map->bus_context); kfree(map->work_buf); @@ -526,6 +655,56 @@ struct regmap *dev_get_regmap(struct device *dev, const char *name) } EXPORT_SYMBOL_GPL(dev_get_regmap); +static int _regmap_select_page(struct regmap *map, unsigned int *reg, + unsigned int val_num) +{ + struct regmap_range_node *range; + void *orig_work_buf; + unsigned int win_offset; + unsigned int win_page; + bool page_chg; + int ret; + + range = _regmap_range_lookup(map, *reg); + if (range) { + win_offset = (*reg - range->range_min) % range->window_len; + win_page = (*reg - range->range_min) / range->window_len; + + if (val_num > 1) { + /* Bulk write shouldn't cross range boundary */ + if (*reg + val_num - 1 > range->range_max) + return -EINVAL; + + /* ... or single page boundary */ + if (val_num > range->window_len - win_offset) + return -EINVAL; + } + + /* It is possible to have selector register inside data window. + In that case, selector register is located on every page and + it needs no page switching, when accessed alone. */ + if (val_num > 1 || + range->window_start + win_offset != range->selector_reg) { + /* Use separate work_buf during page switching */ + orig_work_buf = map->work_buf; + map->work_buf = map->selector_work_buf; + + ret = _regmap_update_bits(map, range->selector_reg, + range->selector_mask, + win_page << range->selector_shift, + &page_chg); + if (ret < 0) + return ret; + + map->work_buf = orig_work_buf; + } + + *reg = range->window_start + win_offset; + } + + return 0; +} + static int _regmap_raw_write(struct regmap *map, unsigned int reg, const void *val, size_t val_len) { @@ -563,6 +742,10 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg, } } + ret = _regmap_select_page(map, ®, val_len / map->format.val_bytes); + if (ret < 0) + return ret; + map->format.format_reg(map->work_buf, reg, map->reg_shift); u8[0] |= map->write_flag_mask; @@ -626,6 +809,10 @@ int _regmap_write(struct regmap *map, unsigned int reg, trace_regmap_reg_write(map->dev, reg, val); if (map->format.format_write) { + ret = _regmap_select_page(map, ®, 1); + if (ret < 0) + return ret; + map->format.format_write(map, reg, val); trace_regmap_hw_write_start(map->dev, reg, 1); @@ -783,6 +970,10 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, u8 *u8 = map->work_buf; int ret; + ret = _regmap_select_page(map, ®, val_len / map->format.val_bytes); + if (ret < 0) + return ret; + map->format.format_reg(map->work_buf, reg, map->reg_shift); /* diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 56af22ec9ab..5f69d4ad3eb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -14,12 +14,14 @@ */ #include +#include struct module; struct device; struct i2c_client; struct spi_device; struct regmap; +struct regmap_range_cfg; /* An enum of all the supported cache types */ enum regcache_type { @@ -84,6 +86,9 @@ struct reg_default { * @reg_defaults_raw: Power on reset values for registers (for use with * register cache support). * @num_reg_defaults_raw: Number of elements in reg_defaults_raw. + * + * @ranges: Array of configuration entries for virtual address ranges. + * @num_ranges: Number of range configuration entries. */ struct regmap_config { const char *name; @@ -109,6 +114,40 @@ struct regmap_config { u8 write_flag_mask; bool use_single_rw; + + const struct regmap_range_cfg *ranges; + unsigned int n_ranges; +}; + +/** + * Configuration for indirectly accessed or paged registers. + * Registers, mapped to this virtual range, are accessed in two steps: + * 1. page selector register update; + * 2. access through data window registers. + * + * @range_min: Address of the lowest register address in virtual range. + * @range_max: Address of the highest register in virtual range. + * + * @page_sel_reg: Register with selector field. + * @page_sel_mask: Bit shift for selector value. + * @page_sel_shift: Bit mask for selector value. + * + * @window_start: Address of first (lowest) register in data window. + * @window_len: Number of registers in data window. + */ +struct regmap_range_cfg { + /* Registers of virtual address range */ + unsigned int range_min; + unsigned int range_max; + + /* Page selector for indirect addressing */ + unsigned int selector_reg; + unsigned int selector_mask; + int selector_shift; + + /* Data window (per each page) */ + unsigned int window_start; + unsigned int window_len; }; typedef int (*regmap_hw_write)(void *context, const void *data, From 632a5b01db8cd4068a06f8a67720ea4f9b00d9b3 Mon Sep 17 00:00:00 2001 From: Krystian Garbaciak Date: Mon, 18 Jun 2012 13:04:29 +0100 Subject: [PATCH 475/475] regmap: Fix work_buf switching for page update during virtual range access. After page update, orginal work_buf has to be restored regardless of the result. Signed-off-by: Krystian Garbaciak Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 83a0166420a..d912eb2d19c 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -693,10 +693,11 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, range->selector_mask, win_page << range->selector_shift, &page_chg); - if (ret < 0) - return ret; map->work_buf = orig_work_buf; + + if (ret < 0) + return ret; } *reg = range->window_start + win_offset;