is meant for developer testing only. In normal usage, kernel picks the
best governor based on governor ratings.
SEE ALSO: sysfs.txt in this directory.
+
+Design:
+
+Cpuidle allows for registration of multiple sets of idle routines.
+The latest registered set is used by cpuidle governors as the current
+active set to choose the right idle state. This set is managed as a
+list and each time the newly registered set is added to the head of the
+list and made the current active set.
+
+An example of how this would work on x86 is shown below.
+
+----------------- -----------------
+| | | |
+| choose b/w | mwait is chosen | mwait |
+| mwait, poll, |-------------------------------------> |(current active|
+| default, c1e | register to cpuidle | set) |
+| | with mwait as the idle routine | |
+----------------- -----------------
+
+
+----------------- -----------------
+| | | c1, c2, c3 |
+| ACPI | register to cpuidle | (current) |
+| discovery |-------------------------------------> |---------------|
+| | with c1, c2, c3 | mwait |
+| | as set of idle routines | |
+----------------- -----------------
+
+With this mechanism, a module can register and unregister its set of
+idle routines at run time in a clean manner.
+
+The main idle routine called inside cpu_idle() of every arch is defined in
+driver/cpuidle/cpuidle.c which would in turn call the idle routine selected
+by the governor. If the CONFIG_CPU_IDLE is disabled, the arch needs to
+provide an alternate definition for cpuidle_idle_call().
bool
default y if 64BIT
+config ARCH_HAS_CPU_IDLE_WAIT
+ def_bool y
+
+config ARCH_HAS_DEFAULT_IDLE
+ def_bool y
+
config GENERIC_HWEIGHT
bool
default y
source "arch/powerpc/sysdev/Kconfig"
source "arch/powerpc/platforms/Kconfig"
+menu "Power management options"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
menu "Kernel options"
config HIGHMEM
// Used to pass parms from the OS to PLIC for SetAsrAndRfid
u64 saved_gpr3; // Saved GPR3 x20-x27
u64 saved_gpr4; // Saved GPR4 x28-x2F
- u64 saved_gpr5; // Saved GPR5 x30-x37
+ union {
+ u64 saved_gpr5; /* Saved GPR5 x30-x37 */
+ struct {
+ u8 cede_latency_hint; /* x30 */
+ u8 reserved[7]; /* x31-x36 */
+ } fields;
+ } gpr5_dword;
+
u8 dtl_enable_mask; // Dispatch Trace Log mask x38-x38
u8 donate_dedicated_cpu; // Donate dedicated CPU cycles x39-x39
#ifdef CONFIG_PPC_PSERIES
extern int pSeries_reconfig_notifier_register(struct notifier_block *);
extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
+extern struct blocking_notifier_head pSeries_reconfig_chain;
#else /* !CONFIG_PPC_PSERIES */
static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
}
#endif
+extern int boot_option_idle_override;
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PROCESSOR_H */
*/
extern void __iomem *of_iomap(struct device_node *device, int index);
+struct of_drconf_cell {
+ u64 base_addr;
+ u32 drc_index;
+ u32 reserved;
+ u32 aa_index;
+ u32 flags;
+};
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+
/*
* NB: This is here while we transition from using asm/prom.h
* to linux/of.h
extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
+extern void default_idle(void);
extern int powersave_nap; /* set if nap mode can be used in idle loop */
/*
extern struct dentry *powerpc_debugfs_root;
+void cpu_idle_wait(void);
+
+#ifdef CONFIG_CPU_IDLE
+extern void update_smt_snooze_delay(int snooze);
+#else
+static inline void update_smt_snooze_delay(int snooze) {}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_SYSTEM_H */
#include <linux/cpu.h>
#include <linux/sysctl.h>
#include <linux/tick.h>
+#include <linux/cpuidle.h>
#include <asm/system.h>
#include <asm/processor.h>
#define cpu_should_die() 0
#endif
+int boot_option_idle_override = 0;
+
static int __init powersave_off(char *arg)
{
- ppc_md.power_save = NULL;
+ boot_option_idle_override = 1;
return 0;
}
__setup("powersave=off", powersave_off);
+#ifndef CONFIG_CPU_IDLE
+void cpuidle_idle_call(void)
+{
+ local_irq_enable();
+ cpu_relax();
+}
+#endif
+
/*
* The body of the idle task.
*/
while (!need_resched() && !cpu_should_die()) {
ppc64_runlatch_off();
- if (ppc_md.power_save) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
- /*
- * smp_mb is so clearing of TIF_POLLING_NRFLAG
- * is ordered w.r.t. need_resched() test.
- */
- smp_mb();
- local_irq_disable();
-
- /* Don't trace irqs off for idle */
- stop_critical_timings();
-
- /* check again after disabling irqs */
- if (!need_resched() && !cpu_should_die())
- ppc_md.power_save();
-
- start_critical_timings();
-
- local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
-
- } else {
- /*
- * Go into low thread priority and possibly
- * low power mode.
- */
- HMT_low();
- HMT_very_low();
- }
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ /*
+ * smp_mb is so clearing of TIF_POLLING_NRFLAG
+ * is ordered w.r.t. need_resched() test.
+ */
+ smp_mb();
+ local_irq_disable();
+
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
+
+ /* check again after disabling irqs */
+ if (!need_resched() && !cpu_should_die())
+ cpuidle_idle_call();
+
+ start_critical_timings();
+
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
}
HMT_medium();
}
}
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+ /* Ensure that new value of idle is set */
+ smp_mb();
+ /* kick all the CPUs so that they exit out of old idle routine */
+ smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+void default_idle(void)
+{
+ HMT_low();
+ HMT_very_low();
+}
+
int powersave_nap;
#ifdef CONFIG_SYSCTL
#include <asm/machdep.h>
#include <asm/smp.h>
#include <asm/pmc.h>
+#include <asm/system.h>
#include "cacheinfo.h"
return -EINVAL;
per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+ update_smt_snooze_delay(snooze);
return count;
}
#ifdef CONFIG_MEMORY_HOTPLUG
#ifdef CONFIG_NUMA
+int __attribute ((weak)) platform_probe_memory(u64 start)
+{
+ return 0;
+}
+
int memory_add_physaddr_to_nid(u64 start)
{
+ int rc;
+
+ rc = platform_probe_memory(start);
+ if (rc)
+ return rc;
+
return hot_add_scn_to_nid(start);
}
#endif
return result;
}
-struct of_drconf_cell {
- u64 base_addr;
- u32 drc_index;
- u32 reserved;
- u32 aa_index;
- u32 flags;
-};
-
-#define DRCONF_MEM_ASSIGNED 0x00000008
-#define DRCONF_MEM_AI_INVALID 0x00000040
-#define DRCONF_MEM_RESERVED 0x00000080
-
/*
* Read the next lmb list entry from the ibm,dynamic-memory property
* and return the information in the provided of_drconf_cell structure.
obj-y := lpar.o hvCall.o nvram.o reconfig.o \
setup.o iommu.o ras.o rtasd.o \
- firmware.o power.o
+ firmware.o power.o dlpar.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_SCANLOG) += scanlog.o
obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DTL) += dtl.o
+obj-$(CONFIG_CPU_IDLE) += processor_idle.o
--- /dev/null
+/*
+ * Support for dynamic reconfiguration (including PCI, Memory, and CPU
+ * Hotplug and Dynamic Logical Partitioning on PAPR platforms).
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/memory_hotplug.h>
+#include <linux/sysdev.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+#include "offline_states.h"
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/pSeries_reconfig.h>
+
+#define CFG_CONN_WORK_SIZE 4096
+static char workarea[CFG_CONN_WORK_SIZE];
+static DEFINE_SPINLOCK(workarea_lock);
+
+struct cc_workarea {
+ u32 drc_index;
+ u32 zero;
+ u32 name_offset;
+ u32 prop_length;
+ u32 prop_offset;
+};
+
+static struct property *parse_cc_property(char *workarea)
+{
+ struct property *prop;
+ struct cc_workarea *ccwa;
+ char *name;
+ char *value;
+
+ prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+ if (!prop)
+ return NULL;
+
+ ccwa = (struct cc_workarea *)workarea;
+ name = workarea + ccwa->name_offset;
+ prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!prop->name) {
+ kfree(prop);
+ return NULL;
+ }
+
+ strcpy(prop->name, name);
+
+ prop->length = ccwa->prop_length;
+ value = workarea + ccwa->prop_offset;
+ prop->value = kzalloc(prop->length, GFP_KERNEL);
+ if (!prop->value) {
+ kfree(prop->name);
+ kfree(prop);
+ return NULL;
+ }
+
+ memcpy(prop->value, value, prop->length);
+ return prop;
+}
+
+static void free_property(struct property *prop)
+{
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+}
+
+static struct device_node *parse_cc_node(char *work_area)
+{
+ struct device_node *dn;
+ struct cc_workarea *ccwa;
+ char *name;
+
+ dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+ if (!dn)
+ return NULL;
+
+ ccwa = (struct cc_workarea *)work_area;
+ name = work_area + ccwa->name_offset;
+ dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!dn->full_name) {
+ kfree(dn);
+ return NULL;
+ }
+
+ strcpy(dn->full_name, name);
+ return dn;
+}
+
+static void free_one_cc_node(struct device_node *dn)
+{
+ struct property *prop;
+
+ while (dn->properties) {
+ prop = dn->properties;
+ dn->properties = prop->next;
+ free_property(prop);
+ }
+
+ kfree(dn->full_name);
+ kfree(dn);
+}
+
+static void free_cc_nodes(struct device_node *dn)
+{
+ if (dn->child)
+ free_cc_nodes(dn->child);
+
+ if (dn->sibling)
+ free_cc_nodes(dn->sibling);
+
+ free_one_cc_node(dn);
+}
+
+#define NEXT_SIBLING 1
+#define NEXT_CHILD 2
+#define NEXT_PROPERTY 3
+#define PREV_PARENT 4
+#define MORE_MEMORY 5
+#define CALL_AGAIN -2
+#define ERR_CFG_USE -9003
+
+struct device_node *configure_connector(u32 drc_index)
+{
+ struct device_node *dn;
+ struct device_node *first_dn = NULL;
+ struct device_node *last_dn = NULL;
+ struct property *property;
+ struct property *last_property = NULL;
+ struct cc_workarea *ccwa;
+ int cc_token;
+ int rc;
+
+ cc_token = rtas_token("ibm,configure-connector");
+ if (cc_token == RTAS_UNKNOWN_SERVICE)
+ return NULL;
+
+ spin_lock(&workarea_lock);
+
+ ccwa = (struct cc_workarea *)&workarea[0];
+ ccwa->drc_index = drc_index;
+ ccwa->zero = 0;
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ while (rc) {
+ switch (rc) {
+ case NEXT_SIBLING:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ dn->parent = last_dn->parent;
+ last_dn->sibling = dn;
+ last_dn = dn;
+ break;
+
+ case NEXT_CHILD:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ if (!first_dn)
+ first_dn = dn;
+ else {
+ dn->parent = last_dn;
+ if (last_dn)
+ last_dn->child = dn;
+ }
+
+ last_dn = dn;
+ break;
+
+ case NEXT_PROPERTY:
+ property = parse_cc_property(workarea);
+ if (!property)
+ goto cc_error;
+
+ if (!last_dn->properties)
+ last_dn->properties = property;
+ else
+ last_property->next = property;
+
+ last_property = property;
+ break;
+
+ case PREV_PARENT:
+ last_dn = last_dn->parent;
+ break;
+
+ case CALL_AGAIN:
+ break;
+
+ case MORE_MEMORY:
+ case ERR_CFG_USE:
+ default:
+ printk(KERN_ERR "Unexpected Error (%d) "
+ "returned from configure-connector\n", rc);
+ goto cc_error;
+ }
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ }
+
+ spin_unlock(&workarea_lock);
+ return first_dn;
+
+cc_error:
+ spin_unlock(&workarea_lock);
+
+ if (first_dn)
+ free_cc_nodes(first_dn);
+
+ return NULL;
+}
+
+static struct device_node *derive_parent(const char *path)
+{
+ struct device_node *parent;
+ char parent_path[128];
+ int parent_path_len;
+
+ parent_path_len = strrchr(path, '/') - path + 1;
+ strlcpy(parent_path, path, parent_path_len);
+
+ parent = of_find_node_by_path(parent_path);
+
+ return parent;
+}
+
+static int add_one_node(struct device_node *dn)
+{
+ struct proc_dir_entry *ent;
+ int rc;
+
+ of_node_set_flag(dn, OF_DYNAMIC);
+ kref_init(&dn->kref);
+ dn->parent = derive_parent(dn->full_name);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_ADD, dn);
+ if (rc == NOTIFY_BAD) {
+ printk(KERN_ERR "Failed to add device node %s\n",
+ dn->full_name);
+ return -ENOMEM; /* For now, safe to assume kmalloc failure */
+ }
+
+ of_attach_node(dn);
+
+#ifdef CONFIG_PROC_DEVICETREE
+ ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
+ if (ent)
+ proc_device_tree_add_node(dn, ent);
+#endif
+
+ of_node_put(dn->parent);
+ return 0;
+}
+
+int add_device_tree_nodes(struct device_node *dn)
+{
+ struct device_node *child = dn->child;
+ struct device_node *sibling = dn->sibling;
+ int rc;
+
+ dn->child = NULL;
+ dn->sibling = NULL;
+ dn->parent = NULL;
+
+ rc = add_one_node(dn);
+ if (rc)
+ return rc;
+
+ if (child) {
+ rc = add_device_tree_nodes(child);
+ if (rc)
+ return rc;
+ }
+
+ if (sibling)
+ rc = add_device_tree_nodes(sibling);
+
+ return rc;
+}
+
+static int remove_one_node(struct device_node *dn)
+{
+ struct device_node *parent = dn->parent;
+ struct property *prop = dn->properties;
+
+#ifdef CONFIG_PROC_DEVICETREE
+ while (prop) {
+ remove_proc_entry(prop->name, dn->pde);
+ prop = prop->next;
+ }
+
+ if (dn->pde)
+ remove_proc_entry(dn->pde->name, parent->pde);
+#endif
+
+ blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_REMOVE, dn);
+ of_detach_node(dn);
+ of_node_put(dn); /* Must decrement the refcount */
+
+ return 0;
+}
+
+static int _remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ if (dn->sibling) {
+ rc = _remove_device_tree_nodes(dn->sibling);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+int remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+int online_node_cpus(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const u32 *intserv;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != intserv[i])
+ continue;
+ BUG_ON(get_cpu_current_state(cpu)
+ != CPU_STATE_OFFLINE);
+ cpu_maps_update_done();
+ rc = cpu_up(cpu);
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
+
+ break;
+ }
+ if (cpu == num_possible_cpus())
+ printk(KERN_WARNING "Could not find cpu to online "
+ "with physical id 0x%x\n", intserv[i]);
+ }
+ cpu_maps_update_done();
+
+out:
+ return rc;
+
+}
+
+int offline_node_cpus(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const u32 *intserv;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != intserv[i])
+ continue;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+ break;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+ cpu_maps_update_done();
+ rc = cpu_down(cpu);
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
+ break;
+
+ }
+
+ /*
+ * The cpu is in CPU_STATE_INACTIVE.
+ * Upgrade it's state to CPU_STATE_OFFLINE.
+ */
+ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+ BUG_ON(plpar_hcall_norets(H_PROD, intserv[i])
+ != H_SUCCESS);
+ __cpu_die(cpu);
+ break;
+ }
+ if (cpu == num_possible_cpus())
+ printk(KERN_WARNING "Could not find cpu to offline "
+ "with physical id 0x%x\n", intserv[i]);
+ }
+ cpu_maps_update_done();
+
+out:
+ return rc;
+
+}
+
+#define DR_ENTITY_SENSE 9003
+#define DR_ENTITY_PRESENT 1
+#define DR_ENTITY_UNUSABLE 2
+#define ALLOCATION_STATE 9003
+#define ALLOC_UNUSABLE 0
+#define ALLOC_USABLE 1
+#define ISOLATION_STATE 9001
+#define ISOLATE 0
+#define UNISOLATE 1
+
+int acquire_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_UNUSABLE)
+ return -1;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ if (rc) {
+ rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ return rc;
+ }
+
+ return 0;
+}
+
+int release_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_PRESENT)
+ return -1;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ if (rc) {
+ rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ return rc;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_MUTEX(pseries_cpu_hotplug_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+ mutex_lock(&pseries_cpu_hotplug_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+ mutex_unlock(&pseries_cpu_hotplug_mutex);
+}
+
+static ssize_t cpu_probe_store(struct class *class, const char *buf,
+ size_t count)
+{
+ struct device_node *dn;
+ unsigned long drc_index;
+ char *cpu_name;
+ int rc;
+
+ cpu_hotplug_driver_lock();
+ rc = strict_strtoul(buf, 0, &drc_index);
+ if (rc)
+ goto out;
+
+ rc = acquire_drc(drc_index);
+ if (rc)
+ goto out;
+
+ dn = configure_connector(drc_index);
+ if (!dn) {
+ release_drc(drc_index);
+ goto out;
+ }
+
+ /* fixup dn name */
+ cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus/") + 1,
+ GFP_KERNEL);
+ if (!cpu_name) {
+ free_cc_nodes(dn);
+ release_drc(drc_index);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sprintf(cpu_name, "/cpus/%s", dn->full_name);
+ kfree(dn->full_name);
+ dn->full_name = cpu_name;
+
+ rc = add_device_tree_nodes(dn);
+ if (rc)
+ release_drc(drc_index);
+
+ rc = online_node_cpus(dn);
+out:
+ cpu_hotplug_driver_unlock();
+
+ return rc ? -EINVAL : count;
+}
+
+static ssize_t cpu_release_store(struct class *class, const char *buf,
+ size_t count)
+{
+ struct device_node *dn;
+ const u32 *drc_index;
+ int rc;
+
+ dn = of_find_node_by_path(buf);
+ if (!dn)
+ return -EINVAL;
+
+ drc_index = of_get_property(dn, "ibm,my-drc-index", NULL);
+ if (!drc_index) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ cpu_hotplug_driver_lock();
+ rc = offline_node_cpus(dn);
+
+ if (rc)
+ goto out;
+
+ rc = release_drc(*drc_index);
+ if (rc) {
+ of_node_put(dn);
+ goto out;
+ }
+
+ rc = remove_device_tree_nodes(dn);
+ if (rc)
+ acquire_drc(*drc_index);
+
+ of_node_put(dn);
+out:
+ cpu_hotplug_driver_unlock();
+ return rc ? -EINVAL : count;
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+static struct property *clone_property(struct property *old_prop)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc((sizeof *new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kstrdup(old_prop->name, GFP_KERNEL);
+ new_prop->value = kzalloc(old_prop->length + 1, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ free_property(new_prop);
+ return NULL;
+ }
+
+ memcpy(new_prop->value, old_prop->value, old_prop->length);
+ new_prop->length = old_prop->length;
+
+ return new_prop;
+}
+
+int platform_probe_memory(u64 phys_addr)
+{
+ struct device_node *dn = NULL;
+ struct property *new_prop;
+ struct property *old_prop;
+ struct of_drconf_cell *drmem;
+ const u64 *lmb_size;
+ int num_entries, i;
+ int rc = -EINVAL;
+
+ if (!phys_addr)
+ goto memory_probe_exit;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ goto memory_probe_exit;
+
+ lmb_size = of_get_property(dn, "ibm,lmb-size", NULL);
+ if (!lmb_size)
+ goto memory_probe_exit;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+ if (!old_prop)
+ goto memory_probe_exit;
+
+ num_entries = *(u32 *)old_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)old_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ u64 lmb_end_addr = drmem[i].base_addr + *lmb_size;
+ if (phys_addr >= drmem[i].base_addr
+ && phys_addr < lmb_end_addr)
+ break;
+ }
+
+ if (i >= num_entries)
+ goto memory_probe_exit;
+
+ if (drmem[i].flags & DRCONF_MEM_ASSIGNED) {
+ /* This lmb is already adssigned to the system, nothing to do */
+ rc = 0;
+ goto memory_probe_exit;
+ }
+
+ rc = acquire_drc(drmem[i].drc_index);
+ if (rc) {
+ rc = -EINVAL;
+ goto memory_probe_exit;
+ }
+
+ new_prop = clone_property(old_prop);
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ drmem[i].flags |= DRCONF_MEM_ASSIGNED;
+ rc = prom_update_property(dn, new_prop, old_prop);
+ if (rc) {
+ free_property(new_prop);
+ rc = -EINVAL;
+ goto memory_probe_exit;
+ }
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_ADD,
+ &drmem[i].base_addr);
+ if (rc == NOTIFY_BAD) {
+ prom_update_property(dn, old_prop, new_prop);
+ release_drc(drmem[i].drc_index);
+ rc = -EINVAL;
+ } else
+ rc = 0;
+
+memory_probe_exit:
+ of_node_put(dn);
+ return rc;
+}
+
+static ssize_t memory_release_store(struct class *class, const char *buf,
+ size_t count)
+{
+ unsigned long drc_index;
+ struct device_node *dn;
+ struct property *new_prop, *old_prop;
+ struct of_drconf_cell *drmem;
+ int num_entries;
+ int i;
+ int rc = -EINVAL;
+
+ rc = strict_strtoul(buf, 0, &drc_index);
+ if (rc)
+ return rc;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return rc;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+ if (!old_prop)
+ goto memory_release_exit;
+
+ num_entries = *(u32 *)old_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)old_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ if (drmem[i].drc_index == drc_index)
+ break;
+ }
+
+ if (i >= num_entries)
+ goto memory_release_exit;
+
+ new_prop = clone_property(old_prop);
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ drmem[i].flags &= ~DRCONF_MEM_ASSIGNED;
+ rc = prom_update_property(dn, new_prop, old_prop);
+ if (rc) {
+ free_property(new_prop);
+ rc = -EINVAL;
+ goto memory_release_exit;
+ }
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_REMOVE,
+ &drmem[i].base_addr);
+ if (rc != NOTIFY_BAD)
+ rc = release_drc(drc_index);
+
+ if (rc) {
+ prom_update_property(dn, old_prop, new_prop);
+ rc = -EINVAL;
+ }
+
+memory_release_exit:
+ of_node_put(dn);
+ return rc ? rc : count;
+}
+
+static struct class_attribute class_attr_mem_release =
+ __ATTR(release, S_IWUSR, NULL, memory_release_store);
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_HOTPLUG_CPU
+static struct class_attribute class_attr_cpu_probe =
+ __ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static struct class_attribute class_attr_cpu_release =
+ __ATTR(release, S_IWUSR, NULL, cpu_release_store);
+#endif
+
+static int pseries_dlpar_init(void)
+{
+ if (!machine_is(pseries))
+ return 0;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ if (sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_mem_release.attr))
+ printk(KERN_INFO "DLPAR: Could not create sysfs memory "
+ "release file\n");
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_cpu_probe.attr))
+ printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
+ "probe file\n");
+
+ if (sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_cpu_release.attr))
+ printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
+ "release file\n");
+#endif
+
+ return 0;
+}
+device_initcall(pseries_dlpar_init);
#include <asm/pSeries_reconfig.h>
#include "xics.h"
#include "plpar_wrappers.h"
+#include "offline_states.h"
/* This version can't take the spinlock, because it never returns */
static struct rtas_args rtas_stop_self_args = {
.rets = &rtas_stop_self_args.args[0],
};
+static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
+ CPU_STATE_OFFLINE;
+static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
+
+static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
+
+static int cede_offline_enabled __read_mostly = 1;
+
+/*
+ * Enable/disable cede_offline when available.
+ */
+static int __init setup_cede_offline(char *str)
+{
+ if (!strcmp(str, "off"))
+ cede_offline_enabled = 0;
+ else if (!strcmp(str, "on"))
+ cede_offline_enabled = 1;
+ else
+ return 0;
+ return 1;
+}
+
+__setup("cede_offline=", setup_cede_offline);
+
+enum cpu_state_vals get_cpu_current_state(int cpu)
+{
+ return per_cpu(current_state, cpu);
+}
+
+void set_cpu_current_state(int cpu, enum cpu_state_vals state)
+{
+ per_cpu(current_state, cpu) = state;
+}
+
+enum cpu_state_vals get_preferred_offline_state(int cpu)
+{
+ return per_cpu(preferred_offline_state, cpu);
+}
+
+void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
+{
+ per_cpu(preferred_offline_state, cpu) = state;
+}
+
+void set_default_offline_state(int cpu)
+{
+ per_cpu(preferred_offline_state, cpu) = default_offline_state;
+}
+
static void rtas_stop_self(void)
{
struct rtas_args *args = &rtas_stop_self_args;
static void pseries_mach_cpu_die(void)
{
+ unsigned int cpu = smp_processor_id();
+ unsigned int hwcpu = hard_smp_processor_id();
+ u8 cede_latency_hint = 0;
+
local_irq_disable();
idle_task_exit();
xics_teardown_cpu();
- unregister_slb_shadow(hard_smp_processor_id(), __pa(get_slb_shadow()));
- rtas_stop_self();
+
+ if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+ set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
+ cede_latency_hint = 2;
+
+ get_lppaca()->idle = 1;
+ if (!get_lppaca()->shared_proc)
+ get_lppaca()->donate_dedicated_cpu = 1;
+
+ printk(KERN_INFO
+ "cpu %u (hwid %u) ceding for offline with hint %d\n",
+ cpu, hwcpu, cede_latency_hint);
+ while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+ extended_cede_processor(cede_latency_hint);
+ printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n",
+ cpu, hwcpu);
+ printk(KERN_INFO
+ "Decrementer value = %x Timebase value = %llx\n",
+ get_dec(), get_tb());
+ }
+
+ printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n",
+ cpu, hwcpu);
+
+ if (!get_lppaca()->shared_proc)
+ get_lppaca()->donate_dedicated_cpu = 0;
+ get_lppaca()->idle = 0;
+ }
+
+ if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
+ unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
+
+ /*
+ * NOTE: Calling start_secondary() here for now to
+ * start new context.
+ * However, need to do it cleanly by resetting the
+ * stack pointer.
+ */
+ start_secondary();
+ goto out_bug;
+
+ } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+
+ set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
+ unregister_slb_shadow(hard_smp_processor_id(),
+ __pa(get_slb_shadow()));
+ rtas_stop_self();
+ goto out_bug;
+ }
+
+out_bug:
/* Should never get here... */
BUG();
for(;;);
static void pseries_cpu_die(unsigned int cpu)
{
int tries;
- int cpu_status;
+ int cpu_status = 1;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
- for (tries = 0; tries < 25; tries++) {
- cpu_status = query_cpu_stopped(pcpu);
- if (cpu_status == 0 || cpu_status == -1)
- break;
- cpu_relax();
+ if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+ cpu_status = 1;
+ for (tries = 0; tries < 1000; tries++) {
+ if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
+ cpu_status = 0;
+ break;
+ }
+ cpu_relax();
+ }
+ } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+
+ for (tries = 0; tries < 25; tries++) {
+ cpu_status = query_cpu_stopped(pcpu);
+ if (cpu_status == 0 || cpu_status == -1)
+ break;
+ cpu_relax();
+ }
}
+
if (cpu_status != 0) {
printk("Querying DEAD? cpu %i (%i) shows %i\n",
cpu, pcpu, cpu_status);
.notifier_call = pseries_smp_notifier,
};
+#define MAX_CEDE_LATENCY_LEVELS 4
+#define CEDE_LATENCY_PARAM_LENGTH 10
+#define CEDE_LATENCY_PARAM_MAX_LENGTH \
+ (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
+#define CEDE_LATENCY_TOKEN 45
+
+static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
+
+static int parse_cede_parameters(void)
+{
+ int call_status;
+
+ memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ CEDE_LATENCY_TOKEN,
+ __pa(cede_parameters),
+ CEDE_LATENCY_PARAM_MAX_LENGTH);
+
+ if (call_status != 0)
+ printk(KERN_INFO "CEDE_LATENCY: \
+ %s %s Error calling get-system-parameter(0x%x)\n",
+ __FILE__, __func__, call_status);
+ else
+ printk(KERN_INFO "CEDE_LATENCY: \
+ get-system-parameter successful.\n");
+
+ return call_status;
+}
+
static int __init pseries_cpu_hotplug_init(void)
{
struct device_node *np;
const char *typep;
+ int cpu;
for_each_node_by_name(np, "interrupt-controller") {
typep = of_get_property(np, "compatible", NULL);
smp_ops->cpu_die = pseries_cpu_die;
/* Processors can be added/removed only on LPAR */
- if (firmware_has_feature(FW_FEATURE_LPAR))
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
pSeries_reconfig_notifier_register(&pseries_smp_nb);
+ cpu_maps_update_begin();
+ if (cede_offline_enabled && parse_cede_parameters() == 0) {
+ default_offline_state = CPU_STATE_INACTIVE;
+ for_each_online_cpu(cpu)
+ set_default_offline_state(cpu);
+ }
+ cpu_maps_update_done();
+ }
return 0;
}
--- /dev/null
+#ifndef _OFFLINE_STATES_H_
+#define _OFFLINE_STATES_H_
+
+/* Cpu offline states go here */
+enum cpu_state_vals {
+ CPU_STATE_OFFLINE,
+ CPU_STATE_INACTIVE,
+ CPU_STATE_ONLINE,
+ CPU_MAX_OFFLINE_STATES
+};
+
+extern enum cpu_state_vals get_cpu_current_state(int cpu);
+extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
+extern enum cpu_state_vals get_preferred_offline_state(int cpu);
+extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
+extern void set_default_offline_state(int cpu);
+extern int start_secondary(void);
+#endif
return plpar_hcall_norets(H_POLL_PENDING);
}
+static inline u8 get_cede_latency_hint(void)
+{
+ return get_lppaca()->gpr5_dword.fields.cede_latency_hint;
+}
+
+static inline void set_cede_latency_hint(u8 latency_hint)
+{
+ get_lppaca()->gpr5_dword.fields.cede_latency_hint = latency_hint;
+}
+
static inline long cede_processor(void)
{
return plpar_hcall_norets(H_CEDE);
}
+static inline long extended_cede_processor(unsigned long latency_hint)
+{
+ long rc;
+ u8 old_latency_hint = get_cede_latency_hint();
+
+ set_cede_latency_hint(latency_hint);
+ rc = cede_processor();
+ set_cede_latency_hint(old_latency_hint);
+
+ return rc;
+}
+
static inline long vpa_call(unsigned long flags, unsigned long cpu,
unsigned long vpa)
{
--- /dev/null
+/*
+ * processor_idle - idle state cpuidle driver.
+ * Adapted from drivers/acpi/processor_idle.c
+ *
+ * Arun R Bharadwaj <arun@linux.vnet.ibm.com>
+ *
+ * Copyright (C) 2009 IBM Corporation.
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+
+#include <asm/paca.h>
+#include <asm/reg.h>
+#include <asm/system.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "plpar_wrappers.h"
+#include "pseries.h"
+
+MODULE_AUTHOR("Arun R Bharadwaj");
+MODULE_DESCRIPTION("pSeries Idle State Driver");
+MODULE_LICENSE("GPL");
+
+struct cpuidle_driver pseries_idle_driver = {
+ .name = "pseries_idle",
+ .owner = THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT 2
+
+/* pSeries Idle state Flags */
+#define PSERIES_DEDICATED_SNOOZE (0x01)
+#define PSERIES_DEDICATED_CEDE (0x02)
+#define PSERIES_SHARED_CEDE (0x03)
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+ return cpuidle_register_device(dev);
+}
+
+static void shared_cede_loop(void)
+{
+ get_lppaca()->idle = 1;
+ cede_processor();
+ get_lppaca()->idle = 0;
+}
+
+static void dedicated_snooze_loop(void)
+{
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+ while (!need_resched()) {
+ ppc64_runlatch_off();
+ HMT_low();
+ HMT_very_low();
+ }
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+ local_irq_disable();
+}
+
+static void dedicated_cede_loop(void)
+{
+ ppc64_runlatch_off();
+ HMT_medium();
+ cede_processor();
+}
+
+static void pseries_cpuidle_loop(struct cpuidle_device *dev,
+ struct cpuidle_state *st)
+{
+ unsigned long in_purr, out_purr;
+
+ get_lppaca()->idle = 1;
+ get_lppaca()->donate_dedicated_cpu = 1;
+ in_purr = mfspr(SPRN_PURR);
+
+ if (st->flags & PSERIES_SHARED_CEDE)
+ shared_cede_loop();
+ else if (st->flags & PSERIES_DEDICATED_SNOOZE)
+ dedicated_snooze_loop();
+ else
+ dedicated_cede_loop();
+
+ out_purr = mfspr(SPRN_PURR);
+ get_lppaca()->wait_state_cycles += out_purr - in_purr;
+ get_lppaca()->donate_dedicated_cpu = 0;
+ get_lppaca()->idle = 0;
+}
+
+static int pseries_setup_cpuidle(struct cpuidle_device *dev, int cpu)
+{
+ int i;
+ struct cpuidle_state *state;
+
+ dev->cpu = cpu;
+
+ if (get_lppaca()->shared_proc) {
+ state = &dev->states[0];
+ snprintf(state->name, CPUIDLE_NAME_LEN, "IDLE");
+ state->enter = pseries_cpuidle_loop;
+ strncpy(state->desc, "shared_cede", CPUIDLE_DESC_LEN);
+ state->flags = PSERIES_SHARED_CEDE;
+ state->exit_latency = 0;
+ state->target_residency = 0;
+ return 0;
+ }
+
+ for (i = 0; i < IDLE_STATE_COUNT; i++) {
+ state = &dev->states[i];
+
+ snprintf(state->name, CPUIDLE_NAME_LEN, "CEDE%d", i);
+ state->enter = pseries_cpuidle_loop;
+
+ switch (i) {
+ case 0:
+ strncpy(state->desc, "snooze", CPUIDLE_DESC_LEN);
+ state->flags = PSERIES_DEDICATED_SNOOZE;
+ state->exit_latency = 0;
+ state->target_residency = 0;
+ break;
+
+ case 1:
+ strncpy(state->desc, "cede", CPUIDLE_DESC_LEN);
+ state->flags = PSERIES_DEDICATED_CEDE;
+ state->exit_latency = 1;
+ state->target_residency =
+ __get_cpu_var(smt_snooze_delay);
+ break;
+ }
+ }
+ dev->state_count = IDLE_STATE_COUNT;
+
+ return 0;
+}
+
+void update_smt_snooze_delay(int snooze)
+{
+ int cpu;
+ for_each_online_cpu(cpu)
+ per_cpu(pseries_dev, cpu).states[0].target_residency = snooze;
+}
+
+static int __init pseries_processor_idle_init(void)
+{
+ int cpu;
+ int result;
+
+ if (boot_option_idle_override) {
+ printk(KERN_DEBUG "Using default idle\n");
+ return 0;
+ }
+
+ result = cpuidle_register_driver(&pseries_idle_driver);
+
+ if (result < 0)
+ return result;
+
+ printk(KERN_DEBUG "pSeries idle driver registered\n");
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ printk(KERN_DEBUG "Using default idle\n");
+ return 0;
+ }
+
+ for_each_online_cpu(cpu) {
+ pseries_setup_cpuidle(&per_cpu(pseries_dev, cpu), cpu);
+ pseries_idle_init(&per_cpu(pseries_dev, cpu));
+ }
+
+ printk(KERN_DEBUG "Using cpuidle idle loop\n");
+
+ return 0;
+}
+
+device_initcall(pseries_processor_idle_init);
#ifndef _PSERIES_PSERIES_H
#define _PSERIES_PSERIES_H
+#include <linux/cpuidle.h>
+
extern void __init fw_feature_init(const char *hypertas, unsigned long len);
struct pt_regs;
extern void find_udbg_vterm(void);
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+extern struct cpuidle_driver pseries_idle_driver;
+
#endif /* _PSERIES_PSERIES_H */
return parent;
}
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
+BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
int fwnmi_active; /* TRUE if an FWNMI handler is present */
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
static struct device_node *pSeries_mpic_node;
static void pSeries_show_cpuinfo(struct seq_file *m)
pSeries_nvram_init();
/* Choose an idle loop */
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ if (firmware_has_feature(FW_FEATURE_SPLPAR))
vpa_init(boot_cpuid);
- if (get_lppaca()->shared_proc) {
- printk(KERN_DEBUG "Using shared processor idle loop\n");
- ppc_md.power_save = pseries_shared_idle_sleep;
- } else {
- printk(KERN_DEBUG "Using dedicated idle loop\n");
- ppc_md.power_save = pseries_dedicated_idle_sleep;
- }
- } else {
- printk(KERN_DEBUG "Using default idle loop\n");
- }
if (firmware_has_feature(FW_FEATURE_LPAR))
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
return 1;
}
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{
- unsigned int cpu = smp_processor_id();
- unsigned long start_snooze;
- unsigned long in_purr, out_purr;
-
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
- get_lppaca()->donate_dedicated_cpu = 1;
- in_purr = mfspr(SPRN_PURR);
-
- /*
- * We come in with interrupts disabled, and need_resched()
- * has been checked recently. If we should poll for a little
- * while, do so.
- */
- if (__get_cpu_var(smt_snooze_delay)) {
- start_snooze = get_tb() +
- __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec;
- local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
-
- while (get_tb() < start_snooze) {
- if (need_resched() || cpu_is_offline(cpu))
- goto out;
- ppc64_runlatch_off();
- HMT_low();
- HMT_very_low();
- }
-
- HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
- local_irq_disable();
- if (need_resched() || cpu_is_offline(cpu))
- goto out;
- }
-
- cede_processor();
-
-out:
- HMT_medium();
- out_purr = mfspr(SPRN_PURR);
- get_lppaca()->wait_state_cycles += out_purr - in_purr;
- get_lppaca()->donate_dedicated_cpu = 0;
- get_lppaca()->idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
-
- /*
- * Yield the processor to the hypervisor. We return if
- * an external interrupt occurs (which are driven prior
- * to returning here) or if a prod occurs from another
- * processor. When returning here, external interrupts
- * are enabled.
- */
- cede_processor();
-
- get_lppaca()->idle = 0;
-}
-
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
if (firmware_has_feature(FW_FEATURE_LPAR))
#include "plpar_wrappers.h"
#include "pseries.h"
#include "xics.h"
+#include "offline_states.h"
/*
/* Fixup atomic count: it exited inside IRQ handler. */
task_thread_info(paca[lcpu].__current)->preempt_count = 0;
+ if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
+ goto out;
+
/*
* If the RTAS start-cpu token does not exist then presume the
* cpu is already spinning.
return 0;
}
+out:
return 1;
}
vpa_init(cpu);
cpu_clear(cpu, of_spin_map);
+ set_cpu_current_state(cpu, CPU_STATE_ONLINE);
+ set_default_offline_state(cpu);
}
#endif /* CONFIG_XICS */
static void __devinit smp_pSeries_kick_cpu(int nr)
{
+ long rc;
+ unsigned long hcpuid;
BUG_ON(nr < 0 || nr >= NR_CPUS);
if (!smp_startup_cpu(nr))
* the processor will continue on to secondary_start
*/
paca[nr].cpu_start = 1;
+
+ set_preferred_offline_state(nr, CPU_STATE_ONLINE);
+
+ if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
+ hcpuid = get_hard_smp_processor_id(nr);
+ rc = plpar_hcall_norets(H_PROD, hcpuid);
+ if (rc != H_SUCCESS)
+ panic("Error: Prod to wake up processor %d Ret= %ld\n",
+ nr, rc);
+ }
}
static int smp_pSeries_cpu_bootable(unsigned int nr)
ptrLpPaca->saved_srr0, ptrLpPaca->saved_srr1);
printf(" Saved Gpr3=%.16lx Saved Gpr4=%.16lx \n",
ptrLpPaca->saved_gpr3, ptrLpPaca->saved_gpr4);
- printf(" Saved Gpr5=%.16lx \n", ptrLpPaca->saved_gpr5);
+ printf(" Saved Gpr5=%.16lx \n",
+ ptrLpPaca->gpr5_dword.saved_gpr5);
}
#endif
{ }
};
+DEFINE_PER_CPU(struct cpuidle_device, apm_idle_devices);
+
+struct cpuidle_driver cpuidle_apm_driver = {
+ .name = "cpuidle_apm",
+};
+
+static void apm_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+ apm_cpu_idle();
+}
+
+static void setup_cpuidle_apm(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ if (!cpuidle_curr_driver)
+ cpuidle_register_driver(&cpuidle_apm_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(apm_idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = apm_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+}
+
+void exit_cpuidle_apm(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(apm_idle_devices, cpu);
+ cpuidle_unregister_device(dev);
+ }
+}
+
+
/*
* Just start the APM thread. We do NOT want to do APM BIOS
* calls from anything but the APM thread, if for no other reason
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
if (idle_threshold < 100) {
- original_pm_idle = pm_idle;
- pm_idle = apm_cpu_idle;
+ setup_cpuidle_apm();
set_pm_idle = 1;
}
int error;
if (set_pm_idle) {
- pm_idle = original_pm_idle;
+ exit_cpuidle_apm();
/*
* We are about to unload the current idle thread pm callback
* (pm_idle), Wait for all processors to update cached/local
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
+#include <linux/cpuidle.h>
#include <trace/events/power.h>
#include <asm/system.h>
#include <asm/apic.h>
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
#ifdef CONFIG_X86_32
/*
* This halt magic was a workaround for ancient floppy DMA
}
/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
*
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
*/
void cpu_idle_wait(void)
{
smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
+ /* kick all the CPUs so that they exit out of idle loop */
smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
default_idle();
}
+static void (*local_idle)(void);
+
+#ifndef CONFIG_CPU_IDLE
+void cpuidle_idle_call(void)
+{
+ if (local_idle)
+ local_idle();
+ else
+ default_idle();
+}
+#endif
+
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+ .name = "cpuidle_default",
+};
+
+static void local_idle_loop(struct cpuidle_device *dev,
+ struct cpuidle_state *st)
+{
+ local_idle();
+}
+
+static int setup_cpuidle_simple(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ if (!cpuidle_curr_driver)
+ cpuidle_register_driver(&cpuidle_default_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = local_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+ return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ if (local_idle == poll_idle && smp_num_siblings > 1) {
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
" performance may degrade.\n");
}
#endif
- if (pm_idle)
+ if (local_idle)
return;
if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
* One CPU supports mwait => All CPUs supports mwait
*/
printk(KERN_INFO "using mwait in idle threads.\n");
- pm_idle = mwait_idle;
+ local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO "using C1E aware idle routine\n");
- pm_idle = c1e_idle;
+ local_idle = c1e_idle;
} else
- pm_idle = default_idle;
+ local_idle = default_idle;
+
+ return;
}
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle)
+ if (local_idle == c1e_idle)
zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
- pm_idle = poll_idle;
+ local_idle = poll_idle;
} else if (!strcmp(str, "mwait"))
force_mwait = 1;
else if (!strcmp(str, "halt")) {
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
- pm_idle = default_idle;
+ local_idle = default_idle;
idle_halt = 1;
return 0;
} else if (!strcmp(str, "nomwait")) {
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ cpuidle_idle_call();
start_critical_timings();
}
tick_nohz_restart_sched_tick();
#include <linux/io.h>
#include <linux/ftrace.h>
#include <linux/dmi.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ cpuidle_idle_call();
start_critical_timings();
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/pm.h>
+#include <linux/cpuidle.h>
#include <asm/elf.h>
#include <asm/vdso.h>
#endif /* CONFIG_X86_64 */
}
+DEFINE_PER_CPU(struct cpuidle_device, xen_idle_devices);
+struct cpuidle_driver cpuidle_xen_driver = {
+ .name = "cpuidle_xen",
+};
+
+static void xen_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+ xen_idle();
+}
+
+static void setup_cpuidle_xen(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ if (!cpuidle_curr_driver)
+ cpuidle_register_driver(&cpuidle_xen_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(xen_idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = xen_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+}
+
void __init xen_arch_setup(void)
{
struct physdev_set_iopl set_iopl;
MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
- pm_idle = xen_idle;
+ setup_cpuidle_xen();
paravirt_disable_iospace();
* should not use mwait for CPU-states.
*/
dmi_check_system(processor_idle_dmi_table);
- result = cpuidle_register_driver(&acpi_idle_driver);
- if (result < 0)
- goto out_proc;
+
+ if (!boot_option_idle_override) {
+ result = cpuidle_register_driver(&acpi_idle_driver);
+ if (result < 0)
+ goto out_proc;
+ }
result = acpi_bus_register_driver(&acpi_processor_driver);
if (result < 0)
*
* This is equivalent to the HALT instruction.
*/
-static int acpi_idle_enter_c1(struct cpuidle_device *dev,
+static void acpi_idle_enter_c1(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
- ktime_t kt1, kt2;
- s64 idle_time;
struct acpi_processor *pr;
struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
local_irq_disable();
if (acpi_idle_suspend) {
local_irq_enable();
cpu_relax();
- return 0;
+ return;
}
lapic_timer_state_broadcast(pr, cx, 1);
- kt1 = ktime_get_real();
acpi_idle_do_entry(cx);
- kt2 = ktime_get_real();
- idle_time = ktime_to_us(ktime_sub(kt2, kt1));
local_irq_enable();
cx->usage++;
lapic_timer_state_broadcast(pr, cx, 0);
-
- return idle_time;
}
/**
* @dev: the target CPU
* @state: the state data
*/
-static int acpi_idle_enter_simple(struct cpuidle_device *dev,
+static void acpi_idle_enter_simple(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
struct acpi_processor *pr;
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
- if (acpi_idle_suspend)
- return(acpi_idle_enter_c1(dev, state));
+ if (acpi_idle_suspend) {
+ acpi_idle_enter_c1(dev, state);
+ return;
+ }
local_irq_disable();
current_thread_info()->status &= ~TS_POLLING;
if (unlikely(need_resched())) {
current_thread_info()->status |= TS_POLLING;
local_irq_enable();
- return 0;
+ return;
}
/*
lapic_timer_state_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return idle_time;
}
static int c3_cpu_count;
*
* If BM is detected, the deepest non-C3 idle state is entered instead.
*/
-static int acpi_idle_enter_bm(struct cpuidle_device *dev,
+static void acpi_idle_enter_bm(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
struct acpi_processor *pr;
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
- if (acpi_idle_suspend)
- return(acpi_idle_enter_c1(dev, state));
+ if (acpi_idle_suspend) {
+ acpi_idle_enter_c1(dev, state);
+ return;
+ }
if (acpi_idle_bm_check()) {
if (dev->safe_state) {
dev->last_state = dev->safe_state;
- return dev->safe_state->enter(dev, dev->safe_state);
+ dev->safe_state->enter(dev, dev->safe_state);
+ return;
} else {
local_irq_disable();
acpi_safe_halt();
local_irq_enable();
- return 0;
+ return;
}
}
if (unlikely(need_resched())) {
current_thread_info()->status |= TS_POLLING;
local_irq_enable();
- return 0;
+ return;
}
acpi_unlazy_tlb(smp_processor_id());
lapic_timer_state_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return idle_time;
}
struct cpuidle_driver acpi_idle_driver = {
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
ssize_t ret;
+ cpu_hotplug_driver_lock();
switch (buf[0]) {
case '0':
ret = cpu_down(cpu->sysdev.id);
default:
ret = -EINVAL;
}
+ cpu_hotplug_driver_unlock();
if (ret >= 0)
ret = count;
#define MEMORY_CLASS_NAME "memory"
-static struct sysdev_class memory_sysdev_class = {
+struct sysdev_class memory_sysdev_class = {
.name = MEMORY_CLASS_NAME,
};
+EXPORT_SYMBOL(memory_sysdev_class);
static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
{
#include "cpuidle.h"
DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DEFINE_PER_CPU(struct list_head, cpuidle_devices_list);
DEFINE_MUTEX(cpuidle_lock);
-LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
-
-static int enabled_devices;
#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
static void cpuidle_kick_cpus(void)
*
* NOTE: no locks or semaphores should be used here
*/
-static void cpuidle_idle_call(void)
+void cpuidle_idle_call(void)
{
struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
struct cpuidle_state *target_state;
int next_state;
+ ktime_t t1, t2;
+ s64 diff;
/* check if the device is ready */
if (!dev || !dev->enabled) {
- if (pm_idle_old)
- pm_idle_old();
- else
#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
- default_idle();
+ default_idle();
#else
- local_irq_enable();
+ local_irq_enable();
#endif
return;
}
hrtimer_peek_ahead_timers();
#endif
/* ask the governor for the next state */
- next_state = cpuidle_curr_governor->select(dev);
+ if (dev->state_count > 1)
+ next_state = cpuidle_curr_governor->select(dev);
+ else
+ next_state = 0;
+
if (need_resched()) {
local_irq_enable();
return;
/* enter the state and update stats */
dev->last_state = target_state;
- dev->last_residency = target_state->enter(dev, target_state);
+
+ t1 = ktime_get();
+
+ target_state->enter(dev, target_state);
+
+ t2 = ktime_get();
+ diff = ktime_to_us(ktime_sub(t2, t1));
+ if (diff > INT_MAX)
+ diff = INT_MAX;
+
+ dev->last_residency = (int) diff;
+
if (dev->last_state)
target_state = dev->last_state;
}
/**
- * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
- */
-void cpuidle_install_idle_handler(void)
-{
- if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
- /* Make sure all changes finished before we switch to new idle */
- smp_wmb();
- pm_idle = cpuidle_idle_call;
- }
-}
-
-/**
- * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
- */
-void cpuidle_uninstall_idle_handler(void)
-{
- if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
- pm_idle = pm_idle_old;
- cpuidle_kick_cpus();
- }
-}
-
-/**
* cpuidle_pause_and_lock - temporarily disables CPUIDLE
*/
void cpuidle_pause_and_lock(void)
{
mutex_lock(&cpuidle_lock);
- cpuidle_uninstall_idle_handler();
+ cpuidle_kick_cpus();
}
EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
*/
void cpuidle_resume_and_unlock(void)
{
- cpuidle_install_idle_handler();
mutex_unlock(&cpuidle_lock);
}
EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
+int cpuidle_add_to_list(struct cpuidle_device *dev)
+{
+ int ret, cpu = dev->cpu;
+ struct cpuidle_device *old_dev;
+
+ if (!list_empty(&per_cpu(cpuidle_devices_list, cpu))) {
+ old_dev = list_first_entry(&per_cpu(cpuidle_devices_list, cpu),
+ struct cpuidle_device, idle_list);
+ cpuidle_remove_state_sysfs(old_dev);
+ }
+
+ list_add(&dev->idle_list, &per_cpu(cpuidle_devices_list, cpu));
+ ret = cpuidle_add_state_sysfs(dev);
+ return ret;
+}
+
+void cpuidle_remove_from_list(struct cpuidle_device *dev)
+{
+ struct cpuidle_device *temp_dev;
+ struct list_head *pos;
+ int ret, cpu = dev->cpu;
+
+ list_for_each(pos, &per_cpu(cpuidle_devices_list, cpu)) {
+ temp_dev = container_of(pos, struct cpuidle_device, idle_list);
+ if (dev == temp_dev) {
+ list_del(&temp_dev->idle_list);
+ cpuidle_remove_state_sysfs(temp_dev);
+ break;
+ }
+ }
+
+ if (!list_empty(&per_cpu(cpuidle_devices_list, cpu))) {
+ temp_dev = list_first_entry(&per_cpu(cpuidle_devices_list, cpu),
+ struct cpuidle_device, idle_list);
+ ret = cpuidle_add_state_sysfs(temp_dev);
+ }
+ cpuidle_kick_cpus();
+}
+
/**
* cpuidle_enable_device - enables idle PM for a CPU
* @dev: the CPU
return ret;
}
- if ((ret = cpuidle_add_state_sysfs(dev)))
- return ret;
-
if (cpuidle_curr_governor->enable &&
(ret = cpuidle_curr_governor->enable(dev)))
goto fail_sysfs;
dev->enabled = 1;
- enabled_devices++;
return 0;
fail_sysfs:
- cpuidle_remove_state_sysfs(dev);
+ cpuidle_remove_from_list(dev);
return ret;
}
if (cpuidle_curr_governor->disable)
cpuidle_curr_governor->disable(dev);
-
- cpuidle_remove_state_sysfs(dev);
- enabled_devices--;
}
EXPORT_SYMBOL_GPL(cpuidle_disable_device);
#ifdef CONFIG_ARCH_HAS_CPU_RELAX
-static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st)
+static void poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st)
{
- ktime_t t1, t2;
- s64 diff;
- int ret;
-
- t1 = ktime_get();
local_irq_enable();
while (!need_resched())
cpu_relax();
-
- t2 = ktime_get();
- diff = ktime_to_us(ktime_sub(t2, t1));
- if (diff > INT_MAX)
- diff = INT_MAX;
-
- ret = (int) diff;
- return ret;
}
static void poll_idle_init(struct cpuidle_device *dev)
*/
static int __cpuidle_register_device(struct cpuidle_device *dev)
{
- int ret;
struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
if (!sys_dev)
if (!try_module_get(cpuidle_curr_driver->owner))
return -EINVAL;
- init_completion(&dev->kobj_unregister);
-
poll_idle_init(dev);
per_cpu(cpuidle_devices, dev->cpu) = dev;
- list_add(&dev->device_list, &cpuidle_detected_devices);
- if ((ret = cpuidle_add_sysfs(sys_dev))) {
- module_put(cpuidle_curr_driver->owner);
- return ret;
- }
dev->registered = 1;
return 0;
}
cpuidle_enable_device(dev);
- cpuidle_install_idle_handler();
+ cpuidle_add_to_list(dev);
mutex_unlock(&cpuidle_lock);
*/
void cpuidle_unregister_device(struct cpuidle_device *dev)
{
- struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
-
if (dev->registered == 0)
return;
cpuidle_pause_and_lock();
cpuidle_disable_device(dev);
+ cpuidle_remove_from_list(dev);
- cpuidle_remove_sysfs(sys_dev);
- list_del(&dev->device_list);
- wait_for_completion(&dev->kobj_unregister);
per_cpu(cpuidle_devices, dev->cpu) = NULL;
cpuidle_resume_and_unlock();
*/
static int __init cpuidle_init(void)
{
- int ret;
-
- pm_idle_old = pm_idle;
+ int ret, cpu;
ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
if (ret)
return ret;
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(cpuidle_devices_list, cpu));
+
latency_notifier_init(&cpuidle_latency_notifier);
return 0;
/* For internal use only */
extern struct cpuidle_governor *cpuidle_curr_governor;
-extern struct cpuidle_driver *cpuidle_curr_driver;
extern struct list_head cpuidle_governors;
-extern struct list_head cpuidle_detected_devices;
extern struct mutex cpuidle_lock;
extern spinlock_t cpuidle_driver_lock;
extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls);
extern int cpuidle_add_state_sysfs(struct cpuidle_device *device);
extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device);
-extern int cpuidle_add_sysfs(struct sys_device *sysdev);
-extern void cpuidle_remove_sysfs(struct sys_device *sysdev);
+extern int cpuidle_add_sysfs(struct cpuidle_device *device);
+extern void cpuidle_remove_sysfs(struct cpuidle_device *device);
#endif /* __DRIVER_CPUIDLE_H */
return -EINVAL;
spin_lock(&cpuidle_driver_lock);
- if (cpuidle_curr_driver) {
- spin_unlock(&cpuidle_driver_lock);
- return -EBUSY;
- }
cpuidle_curr_driver = drv;
spin_unlock(&cpuidle_driver_lock);
*/
int cpuidle_switch_governor(struct cpuidle_governor *gov)
{
- struct cpuidle_device *dev;
+ int cpu;
if (gov == cpuidle_curr_governor)
return 0;
- cpuidle_uninstall_idle_handler();
-
if (cpuidle_curr_governor) {
- list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
- cpuidle_disable_device(dev);
+ for_each_online_cpu(cpu)
+ cpuidle_disable_device(per_cpu(cpuidle_devices, cpu));
module_put(cpuidle_curr_governor->owner);
}
if (gov) {
if (!try_module_get(cpuidle_curr_governor->owner))
return -EINVAL;
- list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
- cpuidle_enable_device(dev);
- cpuidle_install_idle_handler();
+ for_each_online_cpu(cpu)
+ cpuidle_enable_device(per_cpu(cpuidle_devices, cpu));
printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
}
int i, ret = -ENOMEM;
struct cpuidle_state_kobj *kobj;
+ init_completion(&device->kobj_unregister);
+
+ ret = cpuidle_add_sysfs(device);
+ if (ret) {
+ module_put(cpuidle_curr_driver->owner);
+ return ret;
+ }
/* state statistics */
for (i = 0; i < device->state_count; i++) {
kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
for (i = 0; i < device->state_count; i++)
cpuidle_free_state_kobj(device, i);
+
+ cpuidle_remove_sysfs(device);
}
/**
* cpuidle_add_sysfs - creates a sysfs instance for the target device
- * @sysdev: the target device
+ * @device: the target device
*/
-int cpuidle_add_sysfs(struct sys_device *sysdev)
+int cpuidle_add_sysfs(struct cpuidle_device *device)
{
- int cpu = sysdev->id;
- struct cpuidle_device *dev;
int error;
+ struct sys_device *sysdev = get_cpu_sysdev((unsigned long)device->cpu);
- dev = per_cpu(cpuidle_devices, cpu);
- error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &sysdev->kobj,
- "cpuidle");
+ error = kobject_init_and_add(&device->kobj, &ktype_cpuidle,
+ &sysdev->kobj, "cpuidle");
if (!error)
- kobject_uevent(&dev->kobj, KOBJ_ADD);
+ kobject_uevent(&device->kobj, KOBJ_ADD);
return error;
}
/**
* cpuidle_remove_sysfs - deletes a sysfs instance on the target device
- * @sysdev: the target device
+ * @device: the target device
*/
-void cpuidle_remove_sysfs(struct sys_device *sysdev)
+void cpuidle_remove_sysfs(struct cpuidle_device *device)
{
- int cpu = sysdev->id;
- struct cpuidle_device *dev;
-
- dev = per_cpu(cpuidle_devices, cpu);
- kobject_put(&dev->kobj);
+ kobject_put(&device->kobj);
+ wait_for_completion(&device->kobj_unregister);
}
#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
int cpu_down(unsigned int cpu);
+#ifdef CONFIG_PPC_PSERIES
+extern void cpu_hotplug_driver_lock(void);
+extern void cpu_hotplug_driver_unlock(void);
+#else
+static inline void cpu_hotplug_driver_lock(void)
+{
+}
+
+static inline void cpu_hotplug_driver_unlock(void)
+{
+}
+#endif
+
#else /* CONFIG_HOTPLUG_CPU */
#define get_online_cpus() do { } while (0)
unsigned long long usage;
unsigned long long time; /* in US */
- int (*enter) (struct cpuidle_device *dev,
+ void (*enter) (struct cpuidle_device *dev,
struct cpuidle_state *state);
};
struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
struct cpuidle_state *last_state;
- struct list_head device_list;
+ struct list_head idle_list;
struct kobject kobj;
struct completion kobj_unregister;
void *governor_data;
return dev->last_residency;
}
+extern struct cpuidle_driver *cpuidle_curr_driver;
+extern void cpuidle_idle_call(void);
+
/****************************
* CPUIDLE DRIVER INTERFACE *
extern void cpuidle_resume_and_unlock(void);
extern int cpuidle_enable_device(struct cpuidle_device *dev);
extern void cpuidle_disable_device(struct cpuidle_device *dev);
+extern int common_idle_loop(struct cpuidle_device *dev,
+ struct cpuidle_state *st, void (*idle)(void));
#else
static inline int cpuidle_enable_device(struct cpuidle_device *dev)
{return 0;}
static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
+static inline int common_idle_loop(struct cpuidle_device *dev,
+ struct cpuidle_state *st, void (*idle)(void)) { }
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
+extern struct sysdev_class memory_sysdev_class;
+
/*
* Types for free bootmem.
* The normal smallest mapcount is -1. Here is smaller value than it.