From 90133673395849c9d4e66a563f2d0d91d92aa461 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Mon, 7 Jun 2010 22:23:12 +0200 Subject: [PATCH 01/13] PM / Hibernate: Fix typos in comments in kernel/power/swap.c There are a few typos in kernel/power/swap.c. Fix them. Signed-off-by: Cesar Eduardo Barros Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b0bb2177839..7c3ae83e41d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -32,7 +32,7 @@ /* * The swap map is a data structure used for keeping track of each page * written to a swap partition. It consists of many swap_map_page - * structures that contain each an array of MAP_PAGE_SIZE swap entries. + * structures that contain each an array of MAP_PAGE_ENTRIES swap entries. * These structures are stored on the swap and linked together with the * help of the .next_swap member. * @@ -148,7 +148,7 @@ sector_t alloc_swapdev_block(int swap) /** * free_all_swap_pages - free swap pages allocated for saving image data. - * It also frees the extents used to register which swap entres had been + * It also frees the extents used to register which swap entries had been * allocated. */ From 2430d12c94ff2bafcfe4f65edf7ee5f300d2d9c6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sun, 13 Jun 2010 00:36:52 +0200 Subject: [PATCH 02/13] PM: describe kernel policy regarding wakeup defaults (v. 2) This patch (as1381b) updates a comment describing the kernel's policy toward enabling wakeup by default. It also makes device_set_wakeup_capable() actually do something when CONFIG_PM isn't enabled. It's not clear this is necessary; however if it isn't then device_init_wakeup() and device_can_wakeup() should also be do-nothing routines. Furthermore, I don't expect this change to have any noticeable effect -- but if it does then clearly the old behavior was wrong. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 22d64c18056..76aca48722a 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -29,8 +29,11 @@ #ifdef CONFIG_PM -/* changes to device_may_wakeup take effect on the next pm state change. - * by default, devices should wakeup if they can. +/* Changes to device_may_wakeup take effect on the next pm state change. + * + * By default, most devices should leave wakeup disabled. The exceptions + * are devices that everyone expects to be wakeup sources: keyboards, + * power buttons, possibly network interfaces, etc. */ static inline void device_init_wakeup(struct device *dev, bool val) { @@ -59,7 +62,7 @@ static inline bool device_may_wakeup(struct device *dev) #else /* !CONFIG_PM */ -/* For some reason the next two routines work even without CONFIG_PM */ +/* For some reason the following routines work even without CONFIG_PM */ static inline void device_init_wakeup(struct device *dev, bool val) { dev->power.can_wakeup = val; @@ -67,6 +70,7 @@ static inline void device_init_wakeup(struct device *dev, bool val) static inline void device_set_wakeup_capable(struct device *dev, bool capable) { + dev->power.can_wakeup = capable; } static inline bool device_can_wakeup(struct device *dev) From b14e033e17d0ea0ba12668d0d2f371cd31586994 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 29 Jun 2010 22:49:24 +0200 Subject: [PATCH 03/13] PNPACPI: Add support for remote wakeup This patch (as1354) adds remote-wakeup support to the pnpacpi driver. The new can_wakeup method also allows other PNP protocol drivers (pnpbios or iaspnp) to add wakeup support, but I don't know enough about how they work to actually do it. Signed-off-by: Alan Stern Reviewed-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/pnp/core.c | 3 +++ drivers/pnp/pnpacpi/core.c | 23 +++++++++++++++++++++++ include/linux/pnp.h | 1 + 3 files changed, 27 insertions(+) diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c index 5dba90995d9..88b3cde5259 100644 --- a/drivers/pnp/core.c +++ b/drivers/pnp/core.c @@ -164,6 +164,9 @@ int __pnp_add_device(struct pnp_dev *dev) list_add_tail(&dev->global_list, &pnp_global); list_add_tail(&dev->protocol_list, &dev->protocol->devices); spin_unlock(&pnp_lock); + if (dev->protocol->can_wakeup) + device_set_wakeup_capable(&dev->dev, + dev->protocol->can_wakeup(dev)); return device_register(&dev->dev); } diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index f7ff628b7d9..dc4e32e031e 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -122,17 +122,37 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev) } #ifdef CONFIG_ACPI_SLEEP +static bool pnpacpi_can_wakeup(struct pnp_dev *dev) +{ + struct acpi_device *acpi_dev = dev->data; + acpi_handle handle = acpi_dev->handle; + + return acpi_bus_can_wakeup(handle); +} + static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) { struct acpi_device *acpi_dev = dev->data; acpi_handle handle = acpi_dev->handle; int power_state; + if (device_can_wakeup(&dev->dev)) { + int rc = acpi_pm_device_sleep_wake(&dev->dev, + device_may_wakeup(&dev->dev)); + + if (rc) + return rc; + } power_state = acpi_pm_device_sleep_state(&dev->dev, NULL); if (power_state < 0) power_state = (state.event == PM_EVENT_ON) ? ACPI_STATE_D0 : ACPI_STATE_D3; + /* acpi_bus_set_power() often fails (keyboard port can't be + * powered-down?), and in any case, our return value is ignored + * by pnp_bus_suspend(). Hence we don't revert the wakeup + * setting if the set_power fails. + */ return acpi_bus_set_power(handle, power_state); } @@ -141,6 +161,8 @@ static int pnpacpi_resume(struct pnp_dev *dev) struct acpi_device *acpi_dev = dev->data; acpi_handle handle = acpi_dev->handle; + if (device_may_wakeup(&dev->dev)) + acpi_pm_device_sleep_wake(&dev->dev, false); return acpi_bus_set_power(handle, ACPI_STATE_D0); } #endif @@ -151,6 +173,7 @@ struct pnp_protocol pnpacpi_protocol = { .set = pnpacpi_set_resources, .disable = pnpacpi_disable_resources, #ifdef CONFIG_ACPI_SLEEP + .can_wakeup = pnpacpi_can_wakeup, .suspend = pnpacpi_suspend, .resume = pnpacpi_resume, #endif diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 7c4193eb007..1bc1338b817 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -414,6 +414,7 @@ struct pnp_protocol { int (*disable) (struct pnp_dev *dev); /* protocol specific suspend/resume */ + bool (*can_wakeup) (struct pnp_dev *dev); int (*suspend) (struct pnp_dev * dev, pm_message_t state); int (*resume) (struct pnp_dev * dev); From c125e96f044427f38d106fab7bc5e4a5e6a18262 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Jul 2010 22:43:53 +0200 Subject: [PATCH 04/13] PM: Make it possible to avoid races between wakeup and system sleep One of the arguments during the suspend blockers discussion was that the mainline kernel didn't contain any mechanisms making it possible to avoid races between wakeup and system suspend. Generally, there are two problems in that area. First, if a wakeup event occurs exactly when /sys/power/state is being written to, it may be delivered to user space right before the freezer kicks in, so the user space consumer of the event may not be able to process it before the system is suspended. Second, if a wakeup event occurs after user space has been frozen, it is not generally guaranteed that the ongoing transition of the system into a sleep state will be aborted. To address these issues introduce a new global sysfs attribute, /sys/power/wakeup_count, associated with a running counter of wakeup events and three helper functions, pm_stay_awake(), pm_relax(), and pm_wakeup_event(), that may be used by kernel subsystems to control the behavior of this attribute and to request the PM core to abort system transitions into a sleep state already in progress. The /sys/power/wakeup_count file may be read from or written to by user space. Reads will always succeed (unless interrupted by a signal) and return the current value of the wakeup events counter. Writes, however, will only succeed if the written number is equal to the current value of the wakeup events counter. If a write is successful, it will cause the kernel to save the current value of the wakeup events counter and to abort the subsequent system transition into a sleep state if any wakeup events are reported after the write has returned. [The assumption is that before writing to /sys/power/state user space will first read from /sys/power/wakeup_count. Next, user space consumers of wakeup events will have a chance to acknowledge or veto the upcoming system transition to a sleep state. Finally, if the transition is allowed to proceed, /sys/power/wakeup_count will be written to and if that succeeds, /sys/power/state will be written to as well. Still, if any wakeup events are reported to the PM core by kernel subsystems after that point, the transition will be aborted.] Additionally, put a wakeup events counter into struct dev_pm_info and make these per-device wakeup event counters available via sysfs, so that it's possible to check the activity of various wakeup event sources within the kernel. To illustrate how subsystems can use pm_wakeup_event(), make the low-level PCI runtime PM wakeup-handling code use it. Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes Acked-by: Greg Kroah-Hartman Acked-by: markgross Reviewed-by: Alan Stern --- Documentation/ABI/testing/sysfs-power | 15 ++ drivers/base/power/Makefile | 2 +- drivers/base/power/main.c | 1 + drivers/base/power/sysfs.c | 15 ++ drivers/base/power/wakeup.c | 229 ++++++++++++++++++++++++++ drivers/pci/pci-acpi.c | 1 + drivers/pci/pci.c | 20 ++- drivers/pci/pci.h | 1 + drivers/pci/pcie/pme/pcie_pme.c | 5 +- include/linux/pm.h | 10 ++ include/linux/suspend.h | 7 + kernel/power/hibernate.c | 20 ++- kernel/power/main.c | 55 +++++++ kernel/power/suspend.c | 4 +- 14 files changed, 375 insertions(+), 10 deletions(-) create mode 100644 drivers/base/power/wakeup.c diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index d6a801f45b4..2875f1f74a0 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -114,3 +114,18 @@ Description: if this file contains "1", which is the default. It may be disabled by writing "0" to this file, in which case all devices will be suspended and resumed synchronously. + +What: /sys/power/wakeup_count +Date: July 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/power/wakeup_count file allows user space to put the + system into a sleep state while taking into account the + concurrent arrival of wakeup events. Reading from it returns + the current number of registered wakeup events and it blocks if + some wakeup events are being processed at the time the file is + read from. Writing to it will only succeed if the current + number of wakeup events is equal to the written value and, if + successful, will make the kernel abort a subsequent transition + to a sleep state if any wakeup events are reported after the + write has returned. diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 89de75325ce..cbccf9a3cee 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_PM) += sysfs.o -obj-$(CONFIG_PM_SLEEP) += main.o +obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_OPS) += generic_ops.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 941fcb87e52..5419a49ff13 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -59,6 +59,7 @@ void device_pm_init(struct device *dev) { dev->power.status = DPM_ON; init_completion(&dev->power.completion); + dev->power.wakeup_count = 0; pm_runtime_init(dev); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index a4c33bc5125..81d344e0e95 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -73,6 +73,8 @@ * device are known to the PM core. However, for some devices this * attribute is set to "enabled" by bus type code or device drivers and in * that cases it should be safe to leave the default value. + * + * wakeup_count - Report the number of wakeup events related to the device */ static const char enabled[] = "enabled"; @@ -144,6 +146,16 @@ wake_store(struct device * dev, struct device_attribute *attr, static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); +#ifdef CONFIG_PM_SLEEP +static ssize_t wakeup_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", dev->power.wakeup_count); +} + +static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL); +#endif + #ifdef CONFIG_PM_ADVANCED_DEBUG #ifdef CONFIG_PM_RUNTIME @@ -230,6 +242,9 @@ static struct attribute * power_attrs[] = { &dev_attr_control.attr, #endif &dev_attr_wakeup.attr, +#ifdef CONFIG_PM_SLEEP + &dev_attr_wakeup_count.attr, +#endif #ifdef CONFIG_PM_ADVANCED_DEBUG &dev_attr_async.attr, #ifdef CONFIG_PM_RUNTIME diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c new file mode 100644 index 00000000000..25599077c39 --- /dev/null +++ b/drivers/base/power/wakeup.c @@ -0,0 +1,229 @@ +/* + * drivers/base/power/wakeup.c - System wakeup events framework + * + * Copyright (c) 2010 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include + +/* + * If set, the suspend/hibernate code will abort transitions to a sleep state + * if wakeup events are registered during or immediately before the transition. + */ +bool events_check_enabled; + +/* The counter of registered wakeup events. */ +static unsigned long event_count; +/* A preserved old value of event_count. */ +static unsigned long saved_event_count; +/* The counter of wakeup events being processed. */ +static unsigned long events_in_progress; + +static DEFINE_SPINLOCK(events_lock); + +/* + * The functions below use the observation that each wakeup event starts a + * period in which the system should not be suspended. The moment this period + * will end depends on how the wakeup event is going to be processed after being + * detected and all of the possible cases can be divided into two distinct + * groups. + * + * First, a wakeup event may be detected by the same functional unit that will + * carry out the entire processing of it and possibly will pass it to user space + * for further processing. In that case the functional unit that has detected + * the event may later "close" the "no suspend" period associated with it + * directly as soon as it has been dealt with. The pair of pm_stay_awake() and + * pm_relax(), balanced with each other, is supposed to be used in such + * situations. + * + * Second, a wakeup event may be detected by one functional unit and processed + * by another one. In that case the unit that has detected it cannot really + * "close" the "no suspend" period associated with it, unless it knows in + * advance what's going to happen to the event during processing. This + * knowledge, however, may not be available to it, so it can simply specify time + * to wait before the system can be suspended and pass it as the second + * argument of pm_wakeup_event(). + */ + +/** + * pm_stay_awake - Notify the PM core that a wakeup event is being processed. + * @dev: Device the wakeup event is related to. + * + * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the + * counter of wakeup events being processed. If @dev is not NULL, the counter + * of wakeup events related to @dev is incremented too. + * + * Call this function after detecting of a wakeup event if pm_relax() is going + * to be called directly after processing the event (and possibly passing it to + * user space for further processing). + * + * It is safe to call this function from interrupt context. + */ +void pm_stay_awake(struct device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&events_lock, flags); + if (dev) + dev->power.wakeup_count++; + + events_in_progress++; + spin_unlock_irqrestore(&events_lock, flags); +} + +/** + * pm_relax - Notify the PM core that processing of a wakeup event has ended. + * + * Notify the PM core that a wakeup event has been processed by decrementing + * the counter of wakeup events being processed and incrementing the counter + * of registered wakeup events. + * + * Call this function for wakeup events whose processing started with calling + * pm_stay_awake(). + * + * It is safe to call it from interrupt context. + */ +void pm_relax(void) +{ + unsigned long flags; + + spin_lock_irqsave(&events_lock, flags); + if (events_in_progress) { + events_in_progress--; + event_count++; + } + spin_unlock_irqrestore(&events_lock, flags); +} + +/** + * pm_wakeup_work_fn - Deferred closing of a wakeup event. + * + * Execute pm_relax() for a wakeup event detected in the past and free the + * work item object used for queuing up the work. + */ +static void pm_wakeup_work_fn(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + + pm_relax(); + kfree(dwork); +} + +/** + * pm_wakeup_event - Notify the PM core of a wakeup event. + * @dev: Device the wakeup event is related to. + * @msec: Anticipated event processing time (in milliseconds). + * + * Notify the PM core of a wakeup event (signaled by @dev) that will take + * approximately @msec milliseconds to be processed by the kernel. Increment + * the counter of wakeup events being processed and queue up a work item + * that will execute pm_relax() for the event after @msec milliseconds. If @dev + * is not NULL, the counter of wakeup events related to @dev is incremented too. + * + * It is safe to call this function from interrupt context. + */ +void pm_wakeup_event(struct device *dev, unsigned int msec) +{ + unsigned long flags; + struct delayed_work *dwork; + + dwork = msec ? kzalloc(sizeof(*dwork), GFP_ATOMIC) : NULL; + + spin_lock_irqsave(&events_lock, flags); + if (dev) + dev->power.wakeup_count++; + + if (dwork) { + INIT_DELAYED_WORK(dwork, pm_wakeup_work_fn); + schedule_delayed_work(dwork, msecs_to_jiffies(msec)); + + events_in_progress++; + } else { + event_count++; + } + spin_unlock_irqrestore(&events_lock, flags); +} + +/** + * pm_check_wakeup_events - Check for new wakeup events. + * + * Compare the current number of registered wakeup events with its preserved + * value from the past to check if new wakeup events have been registered since + * the old value was stored. Check if the current number of wakeup events being + * processed is zero. + */ +bool pm_check_wakeup_events(void) +{ + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&events_lock, flags); + if (events_check_enabled) { + ret = (event_count == saved_event_count) && !events_in_progress; + events_check_enabled = ret; + } + spin_unlock_irqrestore(&events_lock, flags); + return ret; +} + +/** + * pm_get_wakeup_count - Read the number of registered wakeup events. + * @count: Address to store the value at. + * + * Store the number of registered wakeup events at the address in @count. Block + * if the current number of wakeup events being processed is nonzero. + * + * Return false if the wait for the number of wakeup events being processed to + * drop down to zero has been interrupted by a signal (and the current number + * of wakeup events being processed is still nonzero). Otherwise return true. + */ +bool pm_get_wakeup_count(unsigned long *count) +{ + bool ret; + + spin_lock_irq(&events_lock); + if (capable(CAP_SYS_ADMIN)) + events_check_enabled = false; + + while (events_in_progress && !signal_pending(current)) { + spin_unlock_irq(&events_lock); + + schedule_timeout_interruptible(msecs_to_jiffies(100)); + + spin_lock_irq(&events_lock); + } + *count = event_count; + ret = !events_in_progress; + spin_unlock_irq(&events_lock); + return ret; +} + +/** + * pm_save_wakeup_count - Save the current number of registered wakeup events. + * @count: Value to compare with the current number of registered wakeup events. + * + * If @count is equal to the current number of registered wakeup events and the + * current number of wakeup events being processed is zero, store @count as the + * old number of registered wakeup events to be used by pm_check_wakeup_events() + * and return true. Otherwise return false. + */ +bool pm_save_wakeup_count(unsigned long count) +{ + bool ret = false; + + spin_lock_irq(&events_lock); + if (count == event_count && !events_in_progress) { + saved_event_count = count; + events_check_enabled = true; + ret = true; + } + spin_unlock_irq(&events_lock); + return ret; +} diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 2e7a3bf1382..1ab98bbe58d 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -48,6 +48,7 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context) if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) { pci_check_pme_status(pci_dev); pm_runtime_resume(&pci_dev->dev); + pci_wakeup_event(pci_dev); if (pci_dev->subordinate) pci_pme_wakeup_bus(pci_dev->subordinate); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 740fb4ea966..130ed1daf0f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1275,6 +1275,22 @@ bool pci_check_pme_status(struct pci_dev *dev) return ret; } +/* + * Time to wait before the system can be put into a sleep state after reporting + * a wakeup event signaled by a PCI device. + */ +#define PCI_WAKEUP_COOLDOWN 100 + +/** + * pci_wakeup_event - Report a wakeup event related to a given PCI device. + * @dev: Device to report the wakeup event for. + */ +void pci_wakeup_event(struct pci_dev *dev) +{ + if (device_may_wakeup(&dev->dev)) + pm_wakeup_event(&dev->dev, PCI_WAKEUP_COOLDOWN); +} + /** * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set. * @dev: Device to handle. @@ -1285,8 +1301,10 @@ bool pci_check_pme_status(struct pci_dev *dev) */ static int pci_pme_wakeup(struct pci_dev *dev, void *ign) { - if (pci_check_pme_status(dev)) + if (pci_check_pme_status(dev)) { pm_request_resume(&dev->dev); + pci_wakeup_event(dev); + } return 0; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f8077b3c8c8..c8b7fd056cc 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -56,6 +56,7 @@ extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state); extern void pci_disable_enabled_device(struct pci_dev *dev); extern bool pci_check_pme_status(struct pci_dev *dev); extern int pci_finish_runtime_suspend(struct pci_dev *dev); +extern void pci_wakeup_event(struct pci_dev *dev); extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign); extern void pci_pme_wakeup_bus(struct pci_bus *bus); extern void pci_pm_init(struct pci_dev *dev); diff --git a/drivers/pci/pcie/pme/pcie_pme.c b/drivers/pci/pcie/pme/pcie_pme.c index d672a0a6381..bbdea18693d 100644 --- a/drivers/pci/pcie/pme/pcie_pme.c +++ b/drivers/pci/pcie/pme/pcie_pme.c @@ -154,6 +154,7 @@ static bool pcie_pme_walk_bus(struct pci_bus *bus) /* Skip PCIe devices in case we started from a root port. */ if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { pm_request_resume(&dev->dev); + pci_wakeup_event(dev); ret = true; } @@ -254,8 +255,10 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) if (found) { /* The device is there, but we have to check its PME status. */ found = pci_check_pme_status(dev); - if (found) + if (found) { pm_request_resume(&dev->dev); + pci_wakeup_event(dev); + } pci_dev_put(dev); } else if (devfn) { /* diff --git a/include/linux/pm.h b/include/linux/pm.h index 8e258c72797..b417fc46f3f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -457,6 +457,7 @@ struct dev_pm_info { #ifdef CONFIG_PM_SLEEP struct list_head entry; struct completion completion; + unsigned long wakeup_count; #endif #ifdef CONFIG_PM_RUNTIME struct timer_list suspend_timer; @@ -552,6 +553,11 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); } while (0) extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); + +/* drivers/base/power/wakeup.c */ +extern void pm_wakeup_event(struct device *dev, unsigned int msec); +extern void pm_stay_awake(struct device *dev); +extern void pm_relax(void); #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -565,6 +571,10 @@ static inline int dpm_suspend_start(pm_message_t state) #define suspend_report_result(fn, ret) do {} while (0) static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} + +static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} +static inline void pm_stay_awake(struct device *dev) {} +static inline void pm_relax(void) {} #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index bc7d6bb4cd8..bf1bab7b059 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -286,6 +286,13 @@ extern int unregister_pm_notifier(struct notifier_block *nb); { .notifier_call = fn, .priority = pri }; \ register_pm_notifier(&fn##_nb); \ } + +/* drivers/base/power/wakeup.c */ +extern bool events_check_enabled; + +extern bool pm_check_wakeup_events(void); +extern bool pm_get_wakeup_count(unsigned long *count); +extern bool pm_save_wakeup_count(unsigned long count); #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index aa9e916da4d..f6120291663 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -277,7 +277,7 @@ static int create_image(int platform_mode) goto Enable_irqs; } - if (hibernation_test(TEST_CORE)) + if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events()) goto Power_up; in_suspend = 1; @@ -288,8 +288,10 @@ static int create_image(int platform_mode) error); /* Restore control flow magically appears here */ restore_processor_state(); - if (!in_suspend) + if (!in_suspend) { + events_check_enabled = false; platform_leave(platform_mode); + } Power_up: sysdev_resume(); @@ -511,14 +513,20 @@ int hibernation_platform_enter(void) local_irq_disable(); sysdev_suspend(PMSG_HIBERNATE); + if (!pm_check_wakeup_events()) { + error = -EAGAIN; + goto Power_up; + } + hibernation_ops->enter(); /* We should never get here */ while (1); - /* - * We don't need to reenable the nonboot CPUs or resume consoles, since - * the system is going to be halted anyway. - */ + Power_up: + sysdev_resume(); + local_irq_enable(); + enable_nonboot_cpus(); + Platform_finish: hibernation_ops->finish(); diff --git a/kernel/power/main.c b/kernel/power/main.c index b58800b21fc..62b0bc6e498 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(state); +#ifdef CONFIG_PM_SLEEP +/* + * The 'wakeup_count' attribute, along with the functions defined in + * drivers/base/power/wakeup.c, provides a means by which wakeup events can be + * handled in a non-racy way. + * + * If a wakeup event occurs when the system is in a sleep state, it simply is + * woken up. In turn, if an event that would wake the system up from a sleep + * state occurs when it is undergoing a transition to that sleep state, the + * transition should be aborted. Moreover, if such an event occurs when the + * system is in the working state, an attempt to start a transition to the + * given sleep state should fail during certain period after the detection of + * the event. Using the 'state' attribute alone is not sufficient to satisfy + * these requirements, because a wakeup event may occur exactly when 'state' + * is being written to and may be delivered to user space right before it is + * frozen, so the event will remain only partially processed until the system is + * woken up by another event. In particular, it won't cause the transition to + * a sleep state to be aborted. + * + * This difficulty may be overcome if user space uses 'wakeup_count' before + * writing to 'state'. It first should read from 'wakeup_count' and store + * the read value. Then, after carrying out its own preparations for the system + * transition to a sleep state, it should write the stored value to + * 'wakeup_count'. If that fails, at least one wakeup event has occured since + * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it + * is allowed to write to 'state', but the transition will be aborted if there + * are any wakeup events detected after 'wakeup_count' was written to. + */ + +static ssize_t wakeup_count_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + unsigned long val; + + return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR; +} + +static ssize_t wakeup_count_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (sscanf(buf, "%lu", &val) == 1) { + if (pm_save_wakeup_count(val)) + return n; + } + return -EINVAL; +} + +power_attr(wakeup_count); +#endif /* CONFIG_PM_SLEEP */ + #ifdef CONFIG_PM_TRACE int pm_trace_enabled; @@ -236,6 +290,7 @@ static struct attribute * g[] = { #endif #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, + &wakeup_count_attr.attr, #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index f37cb7dd440..5f8d09f9432 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state) error = sysdev_suspend(PMSG_SUSPEND); if (!error) { - if (!suspend_test(TEST_CORE)) + if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) { error = suspend_ops->enter(state); + events_check_enabled = false; + } sysdev_resume(); } From 12e4d0cc2e0a776a526c93bb2fcb9267abc6e0b1 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 1 Jul 2010 21:46:36 +0200 Subject: [PATCH 05/13] plist: Add plist_last plist is currently used by the scheduler, which only needs to know the highest item in the list. This adds plist_last which allows you to find the lowest. This is necessary for using plists to implement a fast search of dynamic ranges in pm_qos which can have both highest and lowest criteria. Signed-off-by: James Bottomley Signed-off-by: Rafael J. Wysocki --- include/linux/plist.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/linux/plist.h b/include/linux/plist.h index 6898985e7b3..7254eda078e 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -259,6 +259,23 @@ static inline int plist_node_empty(const struct plist_node *node) container_of(plist_first(head), type, member) #endif +/** + * plist_last_entry - get the struct for the last entry + * @head: the &struct plist_head pointer + * @type: the type of the struct this is embedded in + * @member: the name of the list_struct within the struct + */ +#ifdef CONFIG_DEBUG_PI_LIST +# define plist_last_entry(head, type, member) \ +({ \ + WARN_ON(plist_head_empty(head)); \ + container_of(plist_last(head), type, member); \ +}) +#else +# define plist_last_entry(head, type, member) \ + container_of(plist_last(head), type, member) +#endif + /** * plist_first - return the first node (and thus, highest priority) * @head: the &struct plist_head pointer @@ -271,4 +288,16 @@ static inline struct plist_node *plist_first(const struct plist_head *head) struct plist_node, plist.node_list); } +/** + * plist_last - return the last node (and thus, lowest priority) + * @head: the &struct plist_head pointer + * + * Assumes the plist is _not_ empty. + */ +static inline struct plist_node *plist_last(const struct plist_head *head) +{ + return list_entry(head->node_list.prev, + struct plist_node, plist.node_list); +} + #endif From 5f279845f9d684661563894d44729a0c706375b4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 19 Jul 2010 02:00:18 +0200 Subject: [PATCH 06/13] pm_qos: Reimplement using plists A lot of the pm_qos extremal value handling is really duplicating what a priority ordered list does, just in a less efficient fashion. Simply redoing the implementation in terms of a plist gets rid of a lot of this junk (although there are several other strange things that could do with tidying up, like pm_qos_request_list has to carry the pm_qos_class with every node, simply because it doesn't get passed in to pm_qos_update_request even though every caller knows full well what parameter it's updating). I think this redo is a win independent of android, so we should do something like this now. Signed-off-by: James Bottomley Signed-off-by: mark gross Signed-off-by: Rafael J. Wysocki --- kernel/pm_qos_params.c | 176 ++++++++++++++++++++--------------------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index f42d3f737a3..db8e51d7f39 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -30,6 +30,7 @@ /*#define DEBUG*/ #include +#include #include #include #include @@ -49,58 +50,53 @@ * held, taken with _irqsave. One lock to rule them all */ struct pm_qos_request_list { - struct list_head list; - union { - s32 value; - s32 usec; - s32 kbps; - }; + struct plist_node list; int pm_qos_class; }; -static s32 max_compare(s32 v1, s32 v2); -static s32 min_compare(s32 v1, s32 v2); +enum pm_qos_type { + PM_QOS_MAX, /* return the largest value */ + PM_QOS_MIN /* return the smallest value */ +}; struct pm_qos_object { - struct pm_qos_request_list requests; + struct plist_head requests; struct blocking_notifier_head *notifiers; struct miscdevice pm_qos_power_miscdev; char *name; s32 default_value; - atomic_t target_value; - s32 (*comparitor)(s32, s32); + enum pm_qos_type type; }; +static DEFINE_SPINLOCK(pm_qos_lock); + static struct pm_qos_object null_pm_qos; static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); static struct pm_qos_object cpu_dma_pm_qos = { - .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)}, + .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", .default_value = 2000 * USEC_PER_SEC, - .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), - .comparitor = min_compare + .type = PM_QOS_MIN, }; static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); static struct pm_qos_object network_lat_pm_qos = { - .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)}, + .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), .notifiers = &network_lat_notifier, .name = "network_latency", .default_value = 2000 * USEC_PER_SEC, - .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), - .comparitor = min_compare + .type = PM_QOS_MIN }; static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); static struct pm_qos_object network_throughput_pm_qos = { - .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)}, + .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), .notifiers = &network_throughput_notifier, .name = "network_throughput", .default_value = 0, - .target_value = ATOMIC_INIT(0), - .comparitor = max_compare + .type = PM_QOS_MAX, }; @@ -111,8 +107,6 @@ static struct pm_qos_object *pm_qos_array[] = { &network_throughput_pm_qos }; -static DEFINE_SPINLOCK(pm_qos_lock); - static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos); static int pm_qos_power_open(struct inode *inode, struct file *filp); @@ -124,46 +118,55 @@ static const struct file_operations pm_qos_power_fops = { .release = pm_qos_power_release, }; -/* static helper functions */ -static s32 max_compare(s32 v1, s32 v2) +/* unlocked internal variant */ +static inline int pm_qos_get_value(struct pm_qos_object *o) { - return max(v1, v2); + if (plist_head_empty(&o->requests)) + return o->default_value; + + switch (o->type) { + case PM_QOS_MIN: + return plist_last(&o->requests)->prio; + + case PM_QOS_MAX: + return plist_first(&o->requests)->prio; + + default: + /* runtime check for not using enum */ + BUG(); + } } -static s32 min_compare(s32 v1, s32 v2) +static void update_target(struct pm_qos_object *o, struct plist_node *node, + int del, int value) { - return min(v1, v2); -} - - -static void update_target(int pm_qos_class) -{ - s32 extreme_value; - struct pm_qos_request_list *node; unsigned long flags; - int call_notifier = 0; + int prev_value, curr_value; spin_lock_irqsave(&pm_qos_lock, flags); - extreme_value = pm_qos_array[pm_qos_class]->default_value; - list_for_each_entry(node, - &pm_qos_array[pm_qos_class]->requests.list, list) { - extreme_value = pm_qos_array[pm_qos_class]->comparitor( - extreme_value, node->value); - } - if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) != - extreme_value) { - call_notifier = 1; - atomic_set(&pm_qos_array[pm_qos_class]->target_value, - extreme_value); - pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class, - atomic_read(&pm_qos_array[pm_qos_class]->target_value)); + prev_value = pm_qos_get_value(o); + /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */ + if (value != PM_QOS_DEFAULT_VALUE) { + /* + * to change the list, we atomically remove, reinit + * with new value and add, then see if the extremal + * changed + */ + plist_del(node, &o->requests); + plist_node_init(node, value); + plist_add(node, &o->requests); + } else if (del) { + plist_del(node, &o->requests); + } else { + plist_add(node, &o->requests); } + curr_value = pm_qos_get_value(o); spin_unlock_irqrestore(&pm_qos_lock, flags); - if (call_notifier) - blocking_notifier_call_chain( - pm_qos_array[pm_qos_class]->notifiers, - (unsigned long) extreme_value, NULL); + if (prev_value != curr_value) + blocking_notifier_call_chain(o->notifiers, + (unsigned long)curr_value, + NULL); } static int register_pm_qos_misc(struct pm_qos_object *qos) @@ -196,7 +199,14 @@ static int find_pm_qos_object_by_minor(int minor) */ int pm_qos_request(int pm_qos_class) { - return atomic_read(&pm_qos_array[pm_qos_class]->target_value); + unsigned long flags; + int value; + + spin_lock_irqsave(&pm_qos_lock, flags); + value = pm_qos_get_value(pm_qos_array[pm_qos_class]); + spin_unlock_irqrestore(&pm_qos_lock, flags); + + return value; } EXPORT_SYMBOL_GPL(pm_qos_request); @@ -214,21 +224,19 @@ EXPORT_SYMBOL_GPL(pm_qos_request); struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value) { struct pm_qos_request_list *dep; - unsigned long flags; dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL); if (dep) { - if (value == PM_QOS_DEFAULT_VALUE) - dep->value = pm_qos_array[pm_qos_class]->default_value; - else - dep->value = value; - dep->pm_qos_class = pm_qos_class; + struct pm_qos_object *o = pm_qos_array[pm_qos_class]; + int new_value; - spin_lock_irqsave(&pm_qos_lock, flags); - list_add(&dep->list, - &pm_qos_array[pm_qos_class]->requests.list); - spin_unlock_irqrestore(&pm_qos_lock, flags); - update_target(pm_qos_class); + if (value == PM_QOS_DEFAULT_VALUE) + new_value = o->default_value; + else + new_value = value; + plist_node_init(&dep->list, new_value); + dep->pm_qos_class = pm_qos_class; + update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); } return dep; @@ -246,27 +254,23 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request); * Attempts are made to make this code callable on hot code paths. */ void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, - s32 new_value) + s32 new_value) { - unsigned long flags; - int pending_update = 0; s32 temp; + struct pm_qos_object *o; - if (pm_qos_req) { /*guard against callers passing in null */ - spin_lock_irqsave(&pm_qos_lock, flags); - if (new_value == PM_QOS_DEFAULT_VALUE) - temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value; - else - temp = new_value; + if (!pm_qos_req) /*guard against callers passing in null */ + return; - if (temp != pm_qos_req->value) { - pending_update = 1; - pm_qos_req->value = temp; - } - spin_unlock_irqrestore(&pm_qos_lock, flags); - if (pending_update) - update_target(pm_qos_req->pm_qos_class); - } + o = pm_qos_array[pm_qos_req->pm_qos_class]; + + if (new_value == PM_QOS_DEFAULT_VALUE) + temp = o->default_value; + else + temp = new_value; + + if (temp != pm_qos_req->list.prio) + update_target(o, &pm_qos_req->list, 0, temp); } EXPORT_SYMBOL_GPL(pm_qos_update_request); @@ -280,19 +284,15 @@ EXPORT_SYMBOL_GPL(pm_qos_update_request); */ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) { - unsigned long flags; - int qos_class; + struct pm_qos_object *o; if (pm_qos_req == NULL) return; /* silent return to keep pcm code cleaner */ - qos_class = pm_qos_req->pm_qos_class; - spin_lock_irqsave(&pm_qos_lock, flags); - list_del(&pm_qos_req->list); + o = pm_qos_array[pm_qos_req->pm_qos_class]; + update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); kfree(pm_qos_req); - spin_unlock_irqrestore(&pm_qos_lock, flags); - update_target(qos_class); } EXPORT_SYMBOL_GPL(pm_qos_remove_request); From 82f682514a5df89ffb3890627eebf0897b7a84ec Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 5 Jul 2010 22:53:06 +0200 Subject: [PATCH 07/13] pm_qos: Get rid of the allocation in pm_qos_add_request() All current users of pm_qos_add_request() have the ability to supply the memory required by the pm_qos routines, so make them do this and eliminate the kmalloc() with pm_qos_add_request(). This has the double benefit of making the call never fail and allowing it to be called from atomic context. Signed-off-by: James Bottomley Signed-off-by: mark gross Signed-off-by: Rafael J. Wysocki --- drivers/net/e1000e/netdev.c | 17 +++---- drivers/net/igbvf/netdev.c | 9 ++-- drivers/net/wireless/ipw2x00/ipw2100.c | 12 ++--- include/linux/netdevice.h | 2 +- include/linux/pm_qos_params.h | 13 ++++-- include/sound/pcm.h | 2 +- kernel/pm_qos_params.c | 65 +++++++++++++++----------- sound/core/pcm_native.c | 13 ++---- 8 files changed, 73 insertions(+), 60 deletions(-) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 57a7e41da69..9f13b660b80 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -2901,10 +2901,10 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) * dropped transactions. */ pm_qos_update_request( - adapter->netdev->pm_qos_req, 55); + &adapter->netdev->pm_qos_req, 55); } else { pm_qos_update_request( - adapter->netdev->pm_qos_req, + &adapter->netdev->pm_qos_req, PM_QOS_DEFAULT_VALUE); } } @@ -3196,9 +3196,9 @@ int e1000e_up(struct e1000_adapter *adapter) /* DMA latency requirement to workaround early-receive/jumbo issue */ if (adapter->flags & FLAG_HAS_ERT) - adapter->netdev->pm_qos_req = - pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); + pm_qos_add_request(&adapter->netdev->pm_qos_req, + PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); /* hardware has been reset, we need to reload some things */ e1000_configure(adapter); @@ -3263,11 +3263,8 @@ void e1000e_down(struct e1000_adapter *adapter) e1000_clean_tx_ring(adapter); e1000_clean_rx_ring(adapter); - if (adapter->flags & FLAG_HAS_ERT) { - pm_qos_remove_request( - adapter->netdev->pm_qos_req); - adapter->netdev->pm_qos_req = NULL; - } + if (adapter->flags & FLAG_HAS_ERT) + pm_qos_remove_request(&adapter->netdev->pm_qos_req); /* * TODO: for power management, we could drop the link and diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c index 5e2b2a8c56c..add6197d3bc 100644 --- a/drivers/net/igbvf/netdev.c +++ b/drivers/net/igbvf/netdev.c @@ -48,7 +48,7 @@ #define DRV_VERSION "1.0.0-k0" char igbvf_driver_name[] = "igbvf"; const char igbvf_driver_version[] = DRV_VERSION; -struct pm_qos_request_list *igbvf_driver_pm_qos_req; +static struct pm_qos_request_list igbvf_driver_pm_qos_req; static const char igbvf_driver_string[] = "Intel(R) Virtual Function Network Driver"; static const char igbvf_copyright[] = "Copyright (c) 2009 Intel Corporation."; @@ -2902,8 +2902,8 @@ static int __init igbvf_init_module(void) printk(KERN_INFO "%s\n", igbvf_copyright); ret = pci_register_driver(&igbvf_driver); - igbvf_driver_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); + pm_qos_add_request(&igbvf_driver_pm_qos_req, PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); return ret; } @@ -2918,8 +2918,7 @@ module_init(igbvf_init_module); static void __exit igbvf_exit_module(void) { pci_unregister_driver(&igbvf_driver); - pm_qos_remove_request(igbvf_driver_pm_qos_req); - igbvf_driver_pm_qos_req = NULL; + pm_qos_remove_request(&igbvf_driver_pm_qos_req); } module_exit(igbvf_exit_module); diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 0bd4dfa59a8..7f0d98b885b 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -174,7 +174,7 @@ that only one external action is invoked at a time. #define DRV_DESCRIPTION "Intel(R) PRO/Wireless 2100 Network Driver" #define DRV_COPYRIGHT "Copyright(c) 2003-2006 Intel Corporation" -struct pm_qos_request_list *ipw2100_pm_qos_req; +struct pm_qos_request_list ipw2100_pm_qos_req; /* Debugging stuff */ #ifdef CONFIG_IPW2100_DEBUG @@ -1741,7 +1741,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred) /* the ipw2100 hardware really doesn't want power management delays * longer than 175usec */ - pm_qos_update_request(ipw2100_pm_qos_req, 175); + pm_qos_update_request(&ipw2100_pm_qos_req, 175); /* If the interrupt is enabled, turn it off... */ spin_lock_irqsave(&priv->low_lock, flags); @@ -1889,7 +1889,7 @@ static void ipw2100_down(struct ipw2100_priv *priv) ipw2100_disable_interrupts(priv); spin_unlock_irqrestore(&priv->low_lock, flags); - pm_qos_update_request(ipw2100_pm_qos_req, PM_QOS_DEFAULT_VALUE); + pm_qos_update_request(&ipw2100_pm_qos_req, PM_QOS_DEFAULT_VALUE); /* We have to signal any supplicant if we are disassociating */ if (associated) @@ -6669,8 +6669,8 @@ static int __init ipw2100_init(void) if (ret) goto out; - ipw2100_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); + pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; ret = driver_create_file(&ipw2100_pci_driver.driver, @@ -6692,7 +6692,7 @@ static void __exit ipw2100_exit(void) &driver_attr_debug_level); #endif pci_unregister_driver(&ipw2100_pci_driver); - pm_qos_remove_request(ipw2100_pm_qos_req); + pm_qos_remove_request(&ipw2100_pm_qos_req); } module_init(ipw2100_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b21e4054c12..2f22119b4b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,7 +779,7 @@ struct net_device { */ char name[IFNAMSIZ]; - struct pm_qos_request_list *pm_qos_req; + struct pm_qos_request_list pm_qos_req; /* device name hash chain */ struct hlist_node name_hlist; diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index 8ba440e5eb7..77cbddb3784 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -1,8 +1,10 @@ +#ifndef _LINUX_PM_QOS_PARAMS_H +#define _LINUX_PM_QOS_PARAMS_H /* interface for the pm_qos_power infrastructure of the linux kernel. * * Mark Gross */ -#include +#include #include #include @@ -14,9 +16,12 @@ #define PM_QOS_NUM_CLASSES 4 #define PM_QOS_DEFAULT_VALUE -1 -struct pm_qos_request_list; +struct pm_qos_request_list { + struct plist_node list; + int pm_qos_class; +}; -struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value); +void pm_qos_add_request(struct pm_qos_request_list *l, int pm_qos_class, s32 value); void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, s32 new_value); void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req); @@ -24,4 +29,6 @@ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req); int pm_qos_request(int pm_qos_class); int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); +int pm_qos_request_active(struct pm_qos_request_list *req); +#endif diff --git a/include/sound/pcm.h b/include/sound/pcm.h index dd76cdede64..6e3a29732dc 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -366,7 +366,7 @@ struct snd_pcm_substream { int number; char name[32]; /* substream name */ int stream; /* stream (direction) */ - struct pm_qos_request_list *latency_pm_qos_req; /* pm_qos request */ + struct pm_qos_request_list latency_pm_qos_req; /* pm_qos request */ size_t buffer_bytes_max; /* limit ring buffer size */ struct snd_dma_buffer dma_buffer; unsigned int dma_buf_id; diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index db8e51d7f39..996a4dec5f9 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -30,7 +30,6 @@ /*#define DEBUG*/ #include -#include #include #include #include @@ -49,11 +48,6 @@ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ -struct pm_qos_request_list { - struct plist_node list; - int pm_qos_class; -}; - enum pm_qos_type { PM_QOS_MAX, /* return the largest value */ PM_QOS_MIN /* return the smallest value */ @@ -210,6 +204,12 @@ int pm_qos_request(int pm_qos_class) } EXPORT_SYMBOL_GPL(pm_qos_request); +int pm_qos_request_active(struct pm_qos_request_list *req) +{ + return req->pm_qos_class != 0; +} +EXPORT_SYMBOL_GPL(pm_qos_request_active); + /** * pm_qos_add_request - inserts new qos request into the list * @pm_qos_class: identifies which list of qos request to us @@ -221,25 +221,23 @@ EXPORT_SYMBOL_GPL(pm_qos_request); * element as a handle for use in updating and removal. Call needs to save * this handle for later use. */ -struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value) +void pm_qos_add_request(struct pm_qos_request_list *dep, + int pm_qos_class, s32 value) { - struct pm_qos_request_list *dep; + struct pm_qos_object *o = pm_qos_array[pm_qos_class]; + int new_value; - dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL); - if (dep) { - struct pm_qos_object *o = pm_qos_array[pm_qos_class]; - int new_value; - - if (value == PM_QOS_DEFAULT_VALUE) - new_value = o->default_value; - else - new_value = value; - plist_node_init(&dep->list, new_value); - dep->pm_qos_class = pm_qos_class; - update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); + if (pm_qos_request_active(dep)) { + WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); + return; } - - return dep; + if (value == PM_QOS_DEFAULT_VALUE) + new_value = o->default_value; + else + new_value = value; + plist_node_init(&dep->list, new_value); + dep->pm_qos_class = pm_qos_class; + update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); } EXPORT_SYMBOL_GPL(pm_qos_add_request); @@ -262,6 +260,11 @@ void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, if (!pm_qos_req) /*guard against callers passing in null */ return; + if (!pm_qos_request_active(pm_qos_req)) { + WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); + return; + } + o = pm_qos_array[pm_qos_req->pm_qos_class]; if (new_value == PM_QOS_DEFAULT_VALUE) @@ -290,9 +293,14 @@ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) return; /* silent return to keep pcm code cleaner */ + if (!pm_qos_request_active(pm_qos_req)) { + WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); + return; + } + o = pm_qos_array[pm_qos_req->pm_qos_class]; update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); - kfree(pm_qos_req); + memset(pm_qos_req, 0, sizeof(*pm_qos_req)); } EXPORT_SYMBOL_GPL(pm_qos_remove_request); @@ -340,8 +348,12 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); if (pm_qos_class >= 0) { - filp->private_data = (void *) pm_qos_add_request(pm_qos_class, - PM_QOS_DEFAULT_VALUE); + struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req)); + if (!req) + return -ENOMEM; + + pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); + filp->private_data = req; if (filp->private_data) return 0; @@ -353,8 +365,9 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp) { struct pm_qos_request_list *req; - req = (struct pm_qos_request_list *)filp->private_data; + req = filp->private_data; pm_qos_remove_request(req); + kfree(req); return 0; } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 303ac04ff6e..a3b2a647924 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -451,13 +451,11 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, snd_pcm_timer_resolution_change(substream); runtime->status->state = SNDRV_PCM_STATE_SETUP; - if (substream->latency_pm_qos_req) { - pm_qos_remove_request(substream->latency_pm_qos_req); - substream->latency_pm_qos_req = NULL; - } + if (pm_qos_request_active(&substream->latency_pm_qos_req)) + pm_qos_remove_request(&substream->latency_pm_qos_req); if ((usecs = period_to_usecs(runtime)) >= 0) - substream->latency_pm_qos_req = pm_qos_add_request( - PM_QOS_CPU_DMA_LATENCY, usecs); + pm_qos_add_request(&substream->latency_pm_qos_req, + PM_QOS_CPU_DMA_LATENCY, usecs); return 0; _error: /* hardware might be unuseable from this time, @@ -512,8 +510,7 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream) if (substream->ops->hw_free) result = substream->ops->hw_free(substream); runtime->status->state = SNDRV_PCM_STATE_OPEN; - pm_qos_remove_request(substream->latency_pm_qos_req); - substream->latency_pm_qos_req = NULL; + pm_qos_remove_request(&substream->latency_pm_qos_req); return result; } From f6f71f187518477cecc01cd887933b5da19585e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:18 +0200 Subject: [PATCH 08/13] PM / Hibernate: Fix hibernation_platform_enter() The hibernation_platform_enter() function calls dpm_suspend_noirq() instead of dpm_resume_noirq() by mistake. Fix this. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f6120291663..d97ba8615c3 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -530,7 +530,7 @@ int hibernation_platform_enter(void) Platform_finish: hibernation_ops->finish(); - dpm_suspend_noirq(PMSG_RESTORE); + dpm_resume_noirq(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; From d074ee023fa3a4681b64223c5e636102c39628c4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:35 +0200 Subject: [PATCH 09/13] PM / Hibernate: Fix snapshot error code path There is an inconsistency between hibernation_platform_enter() and hibernation_snapshot(), because the latter calls hibernation_ops->end() after failing hibernation_ops->begin(), while the former doesn't do that. Make hibernation_snapshot() behave in the same way as hibernation_platform_enter() in that respect. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index d97ba8615c3..d26f04e9274 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -330,7 +330,7 @@ int hibernation_snapshot(int platform_mode) error = platform_begin(platform_mode); if (error) - return error; + goto Close; /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); From ce4410116c5debfb0e049f5db4b5cd6211e05b80 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:45 +0200 Subject: [PATCH 10/13] PM / Suspend: Fix ordering of calls in suspend error paths The ACPI suspend code calls suspend_nvs_free() at a wrong place, which may lead to a memory leak if there's an error executing acpi_pm_prepare(), because acpi_pm_finish() will not be called in that case. However, the root cause of this problem is the apparently confusing ordering of calls in suspend error paths that needs to be fixed. In addition to that, fix a typo in a label name in suspend.c. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- include/linux/suspend.h | 10 ++++++---- kernel/power/suspend.c | 9 ++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index bf1bab7b059..4af270ec220 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -61,14 +61,15 @@ typedef int __bitwise suspend_state_t; * before device drivers' late suspend callbacks are executed. It returns * 0 on success or a negative error code otherwise, in which case the * system cannot enter the desired sleep state (@prepare_late(), @enter(), - * @wake(), and @finish() will not be called in that case). + * and @wake() will not be called in that case). * * @prepare_late: Finish preparing the platform for entering the system sleep * state indicated by @begin(). * @prepare_late is called before disabling nonboot CPUs and after * device drivers' late suspend callbacks have been executed. It returns * 0 on success or a negative error code otherwise, in which case the - * system cannot enter the desired sleep state (@enter() and @wake()). + * system cannot enter the desired sleep state (@enter() will not be + * executed). * * @enter: Enter the system sleep state indicated by @begin() or represented by * the argument if @begin() is not implemented. @@ -81,14 +82,15 @@ typedef int __bitwise suspend_state_t; * resume callbacks are executed. * This callback is optional, but should be implemented by the platforms * that implement @prepare_late(). If implemented, it is always called - * after @enter(), even if @enter() fails. + * after @prepare_late and @enter(), even if one of them fails. * * @finish: Finish wake-up of the platform. * @finish is called right prior to calling device drivers' regular suspend * callbacks. * This callback is optional, but should be implemented by the platforms * that implement @prepare(). If implemented, it is always called after - * @enter() and @wake(), if implemented, even if any of them fails. + * @enter() and @wake(), even if any of them fails. It is executed after + * a failing @prepare. * * @end: Called by the PM core right after resuming devices, to indicate to * the platform that the system has returned to the working state or diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5f8d09f9432..7335952ee47 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -136,19 +136,19 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) - return error; + goto Platform_finish; } error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); - goto Platfrom_finish; + goto Platform_finish; } if (suspend_ops->prepare_late) { error = suspend_ops->prepare_late(); if (error) - goto Power_up_devices; + goto Platform_wake; } if (suspend_test(TEST_PLATFORM)) @@ -180,10 +180,9 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->wake) suspend_ops->wake(); - Power_up_devices: dpm_resume_noirq(PMSG_RESUME); - Platfrom_finish: + Platform_finish: if (suspend_ops->finish) suspend_ops->finish(); From 4eb241e5691363c391aac8a5051d0d013188ec84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:51 +0200 Subject: [PATCH 11/13] PM: Do not use dynamically allocated objects in pm_wakeup_event() Originally, pm_wakeup_event() uses struct delayed_work objects, allocated with GFP_ATOMIC, to schedule the execution of pm_relax() in future. However, as noted by Alan Stern, it is not necessary to do that, because all pm_wakeup_event() calls can use one static timer that will always be set to expire at the latest time passed to pm_wakeup_event(). The modifications are based on the example code posted by Alan. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 56 ++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 25599077c39..eb594facfc3 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -28,6 +28,11 @@ static unsigned long events_in_progress; static DEFINE_SPINLOCK(events_lock); +static void pm_wakeup_timer_fn(unsigned long data); + +static DEFINE_TIMER(events_timer, pm_wakeup_timer_fn, 0, 0); +static unsigned long events_timer_expires; + /* * The functions below use the observation that each wakeup event starts a * period in which the system should not be suspended. The moment this period @@ -103,17 +108,22 @@ void pm_relax(void) } /** - * pm_wakeup_work_fn - Deferred closing of a wakeup event. + * pm_wakeup_timer_fn - Delayed finalization of a wakeup event. * - * Execute pm_relax() for a wakeup event detected in the past and free the - * work item object used for queuing up the work. + * Decrease the counter of wakeup events being processed after it was increased + * by pm_wakeup_event(). */ -static void pm_wakeup_work_fn(struct work_struct *work) +static void pm_wakeup_timer_fn(unsigned long data) { - struct delayed_work *dwork = to_delayed_work(work); + unsigned long flags; - pm_relax(); - kfree(dwork); + spin_lock_irqsave(&events_lock, flags); + if (events_timer_expires + && time_before_eq(events_timer_expires, jiffies)) { + events_in_progress--; + events_timer_expires = 0; + } + spin_unlock_irqrestore(&events_lock, flags); } /** @@ -123,30 +133,38 @@ static void pm_wakeup_work_fn(struct work_struct *work) * * Notify the PM core of a wakeup event (signaled by @dev) that will take * approximately @msec milliseconds to be processed by the kernel. Increment - * the counter of wakeup events being processed and queue up a work item - * that will execute pm_relax() for the event after @msec milliseconds. If @dev - * is not NULL, the counter of wakeup events related to @dev is incremented too. + * the counter of registered wakeup events and (if @msec is nonzero) set up + * the wakeup events timer to execute pm_wakeup_timer_fn() in future (if the + * timer has not been set up already, increment the counter of wakeup events + * being processed). If @dev is not NULL, the counter of wakeup events related + * to @dev is incremented too. * * It is safe to call this function from interrupt context. */ void pm_wakeup_event(struct device *dev, unsigned int msec) { unsigned long flags; - struct delayed_work *dwork; - - dwork = msec ? kzalloc(sizeof(*dwork), GFP_ATOMIC) : NULL; spin_lock_irqsave(&events_lock, flags); + event_count++; if (dev) dev->power.wakeup_count++; - if (dwork) { - INIT_DELAYED_WORK(dwork, pm_wakeup_work_fn); - schedule_delayed_work(dwork, msecs_to_jiffies(msec)); + if (msec) { + unsigned long expires; - events_in_progress++; - } else { - event_count++; + expires = jiffies + msecs_to_jiffies(msec); + if (!expires) + expires = 1; + + if (!events_timer_expires + || time_after(expires, events_timer_expires)) { + if (!events_timer_expires) + events_in_progress++; + + mod_timer(&events_timer, expires); + events_timer_expires = expires; + } } spin_unlock_irqrestore(&events_lock, flags); } From 0fcb4eef8294492c8f1de8236b1ed81f09e42922 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 8 Jul 2010 00:05:37 +0200 Subject: [PATCH 12/13] PM / Runtime: Make runtime_status attribute not debug-only (v. 2) This patch (as1404b) makes the runtime_status sysfs attribute available even in the absence of CONFIG_PM_ADVANCED_DEBUG, and it changes the routine to display "unsupported" when runtime PM is disabled for a device. Although not strictly 100% accurate, this will almost always be correct. Signed-off-by: Alan Stern Acked-by: Dominik Brodowski Signed-off-by: Rafael J. Wysocki --- drivers/base/power/sysfs.c | 53 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 81d344e0e95..1eca50c8e7c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -110,6 +110,38 @@ static ssize_t control_store(struct device * dev, struct device_attribute *attr, } static DEVICE_ATTR(control, 0644, control_show, control_store); + +static ssize_t rtpm_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const char *p; + + if (dev->power.runtime_error) { + p = "error\n"; + } else if (dev->power.disable_depth) { + p = "unsupported\n"; + } else { + switch (dev->power.runtime_status) { + case RPM_SUSPENDED: + p = "suspended\n"; + break; + case RPM_SUSPENDING: + p = "suspending\n"; + break; + case RPM_RESUMING: + p = "resuming\n"; + break; + case RPM_ACTIVE: + p = "active\n"; + break; + default: + return -EIO; + } + } + return sprintf(buf, p); +} + +static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL); #endif static ssize_t @@ -184,27 +216,8 @@ static ssize_t rtpm_enabled_show(struct device *dev, return sprintf(buf, "enabled\n"); } -static ssize_t rtpm_status_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (dev->power.runtime_error) - return sprintf(buf, "error\n"); - switch (dev->power.runtime_status) { - case RPM_SUSPENDED: - return sprintf(buf, "suspended\n"); - case RPM_SUSPENDING: - return sprintf(buf, "suspending\n"); - case RPM_RESUMING: - return sprintf(buf, "resuming\n"); - case RPM_ACTIVE: - return sprintf(buf, "active\n"); - } - return -EIO; -} - static DEVICE_ATTR(runtime_usage, 0444, rtpm_usagecount_show, NULL); static DEVICE_ATTR(runtime_active_kids, 0444, rtpm_children_show, NULL); -static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL); static DEVICE_ATTR(runtime_enabled, 0444, rtpm_enabled_show, NULL); #endif @@ -240,6 +253,7 @@ static DEVICE_ATTR(async, 0644, async_show, async_store); static struct attribute * power_attrs[] = { #ifdef CONFIG_PM_RUNTIME &dev_attr_control.attr, + &dev_attr_runtime_status.attr, #endif &dev_attr_wakeup.attr, #ifdef CONFIG_PM_SLEEP @@ -250,7 +264,6 @@ static struct attribute * power_attrs[] = { #ifdef CONFIG_PM_RUNTIME &dev_attr_runtime_usage.attr, &dev_attr_runtime_active_kids.attr, - &dev_attr_runtime_status.attr, &dev_attr_runtime_enabled.attr, #endif #endif From 8d4b9d1bfef117862a2889dec4dac227068544c9 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 19 Jul 2010 02:01:06 +0200 Subject: [PATCH 13/13] PM / Runtime: Add runtime PM statistics (v3) In order for PowerTOP to be able to report how well the new runtime PM is working for the various drivers, the kernel needs to export some basic statistics in sysfs. This patch adds two sysfs files in the runtime PM domain that expose the total time a device has been active, and the time a device has been suspended. With this PowerTOP can compute the activity percentage Active %age = 100 * (delta active) / (delta active + delta suspended) and present the information to the user. I've written the PowerTOP code (slated for version 1.12) already, and the output looks like this: Runtime Device Power Management statistics Active Device name 10.0% 06:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8101E/RTL8102E PCI Express Fast Ethernet controller [version 2: fix stat update bugs noticed by Alan Stern] [version 3: rebase to -next and move the sysfs declaration] Signed-off-by: Arjan van de Ven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 54 +++++++++++++++++++++++++++++++----- drivers/base/power/sysfs.c | 30 ++++++++++++++++++++ include/linux/pm.h | 6 ++++ 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index b0ec0e9f27e..b78c401ffa7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -123,6 +123,45 @@ int pm_runtime_idle(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_idle); + +/** + * update_pm_runtime_accounting - Update the time accounting of power states + * @dev: Device to update the accounting for + * + * In order to be able to have time accounting of the various power states + * (as used by programs such as PowerTOP to show the effectiveness of runtime + * PM), we need to track the time spent in each state. + * update_pm_runtime_accounting must be called each time before the + * runtime_status field is updated, to account the time in the old state + * correctly. + */ +void update_pm_runtime_accounting(struct device *dev) +{ + unsigned long now = jiffies; + int delta; + + delta = now - dev->power.accounting_timestamp; + + if (delta < 0) + delta = 0; + + dev->power.accounting_timestamp = now; + + if (dev->power.disable_depth > 0) + return; + + if (dev->power.runtime_status == RPM_SUSPENDED) + dev->power.suspended_jiffies += delta; + else + dev->power.active_jiffies += delta; +} + +static void __update_runtime_status(struct device *dev, enum rpm_status status) +{ + update_pm_runtime_accounting(dev); + dev->power.runtime_status = status; +} + /** * __pm_runtime_suspend - Carry out run-time suspend of given device. * @dev: Device to suspend. @@ -197,7 +236,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq) goto repeat; } - dev->power.runtime_status = RPM_SUSPENDING; + __update_runtime_status(dev, RPM_SUSPENDING); dev->power.deferred_resume = false; if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) { @@ -228,7 +267,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq) } if (retval) { - dev->power.runtime_status = RPM_ACTIVE; + __update_runtime_status(dev, RPM_ACTIVE); if (retval == -EAGAIN || retval == -EBUSY) { if (dev->power.timer_expires == 0) notify = true; @@ -237,7 +276,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq) pm_runtime_cancel_pending(dev); } } else { - dev->power.runtime_status = RPM_SUSPENDED; + __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_deactivate_timer(dev); if (dev->parent) { @@ -381,7 +420,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) goto repeat; } - dev->power.runtime_status = RPM_RESUMING; + __update_runtime_status(dev, RPM_RESUMING); if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) { spin_unlock_irq(&dev->power.lock); @@ -411,10 +450,10 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) } if (retval) { - dev->power.runtime_status = RPM_SUSPENDED; + __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_cancel_pending(dev); } else { - dev->power.runtime_status = RPM_ACTIVE; + __update_runtime_status(dev, RPM_ACTIVE); if (parent) atomic_inc(&parent->power.child_count); } @@ -848,7 +887,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) } out_set: - dev->power.runtime_status = status; + __update_runtime_status(dev, status); dev->power.runtime_error = 0; out: spin_unlock_irqrestore(&dev->power.lock, flags); @@ -1077,6 +1116,7 @@ void pm_runtime_init(struct device *dev) dev->power.request_pending = false; dev->power.request = RPM_REQ_NONE; dev->power.deferred_resume = false; + dev->power.accounting_timestamp = jiffies; INIT_WORK(&dev->power.work, pm_runtime_work); dev->power.timer_expires = 0; diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 1eca50c8e7c..e56b4388fe6 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "power.h" /* @@ -111,6 +112,33 @@ static ssize_t control_store(struct device * dev, struct device_attribute *attr, static DEVICE_ATTR(control, 0644, control_show, control_store); +static ssize_t rtpm_active_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + spin_lock_irq(&dev->power.lock); + update_pm_runtime_accounting(dev); + ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies)); + spin_unlock_irq(&dev->power.lock); + return ret; +} + +static DEVICE_ATTR(runtime_active_time, 0444, rtpm_active_time_show, NULL); + +static ssize_t rtpm_suspended_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + spin_lock_irq(&dev->power.lock); + update_pm_runtime_accounting(dev); + ret = sprintf(buf, "%i\n", + jiffies_to_msecs(dev->power.suspended_jiffies)); + spin_unlock_irq(&dev->power.lock); + return ret; +} + +static DEVICE_ATTR(runtime_suspended_time, 0444, rtpm_suspended_time_show, NULL); + static ssize_t rtpm_status_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -254,6 +282,8 @@ static struct attribute * power_attrs[] = { #ifdef CONFIG_PM_RUNTIME &dev_attr_control.attr, &dev_attr_runtime_status.attr, + &dev_attr_runtime_suspended_time.attr, + &dev_attr_runtime_active_time.attr, #endif &dev_attr_wakeup.attr, #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/pm.h b/include/linux/pm.h index b417fc46f3f..52e8c55ff31 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -477,9 +477,15 @@ struct dev_pm_info { enum rpm_request request; enum rpm_status runtime_status; int runtime_error; + unsigned long active_jiffies; + unsigned long suspended_jiffies; + unsigned long accounting_timestamp; #endif }; +extern void update_pm_runtime_accounting(struct device *dev); + + /* * The PM_EVENT_ messages are also used by drivers implementing the legacy * suspend framework, based on the ->suspend() and ->resume() callbacks common