remoteproc: fix off-by-one bug in __rproc_free_vrings
[linux-flexiantxendom0-3.2.10.git] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32
33 #include <trace/events/power.h>
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69
70 #define lock_policy_rwsem(mode, cpu)                                    \
71 static int lock_policy_rwsem_##mode                                     \
72 (int cpu)                                                               \
73 {                                                                       \
74         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
75         BUG_ON(policy_cpu == -1);                                       \
76         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
77         if (unlikely(!cpu_online(cpu))) {                               \
78                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
79                 return -1;                                              \
80         }                                                               \
81                                                                         \
82         return 0;                                                       \
83 }
84
85 lock_policy_rwsem(read, cpu);
86
87 lock_policy_rwsem(write, cpu);
88
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92         BUG_ON(policy_cpu == -1);
93         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99         BUG_ON(policy_cpu == -1);
100         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102
103
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106                 unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124         init_cpufreq_transition_notifier_list_called = true;
125         return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128
129 static int off __read_mostly;
130 int cpufreq_disabled(void)
131 {
132         return off;
133 }
134 void disable_cpufreq(void)
135 {
136         off = 1;
137 }
138 static LIST_HEAD(cpufreq_governor_list);
139 static DEFINE_MUTEX(cpufreq_governor_mutex);
140
141 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
142 {
143         struct cpufreq_policy *data;
144         unsigned long flags;
145
146         if (cpu >= nr_cpu_ids)
147                 goto err_out;
148
149         /* get the cpufreq driver */
150         spin_lock_irqsave(&cpufreq_driver_lock, flags);
151
152         if (!cpufreq_driver)
153                 goto err_out_unlock;
154
155         if (!try_module_get(cpufreq_driver->owner))
156                 goto err_out_unlock;
157
158
159         /* get the CPU */
160         data = per_cpu(cpufreq_cpu_data, cpu);
161
162         if (!data)
163                 goto err_out_put_module;
164
165         if (!kobject_get(&data->kobj))
166                 goto err_out_put_module;
167
168         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
169         return data;
170
171 err_out_put_module:
172         module_put(cpufreq_driver->owner);
173 err_out_unlock:
174         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
175 err_out:
176         return NULL;
177 }
178 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
179
180
181 void cpufreq_cpu_put(struct cpufreq_policy *data)
182 {
183         kobject_put(&data->kobj);
184         module_put(cpufreq_driver->owner);
185 }
186 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
187
188
189 /*********************************************************************
190  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
191  *********************************************************************/
192
193 /**
194  * adjust_jiffies - adjust the system "loops_per_jiffy"
195  *
196  * This function alters the system "loops_per_jiffy" for the clock
197  * speed change. Note that loops_per_jiffy cannot be updated on SMP
198  * systems as each CPU might be scaled differently. So, use the arch
199  * per-CPU loops_per_jiffy value wherever possible.
200  */
201 #ifndef CONFIG_SMP
202 static unsigned long l_p_j_ref;
203 static unsigned int  l_p_j_ref_freq;
204
205 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
206 {
207         if (ci->flags & CPUFREQ_CONST_LOOPS)
208                 return;
209
210         if (!l_p_j_ref_freq) {
211                 l_p_j_ref = loops_per_jiffy;
212                 l_p_j_ref_freq = ci->old;
213                 pr_debug("saving %lu as reference value for loops_per_jiffy; "
214                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
215         }
216         if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
217             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
218                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
219                                                                 ci->new);
220                 pr_debug("scaling loops_per_jiffy to %lu "
221                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
222         }
223 }
224 #else
225 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
226 {
227         return;
228 }
229 #endif
230
231
232 /**
233  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
234  * on frequency transition.
235  *
236  * This function calls the transition notifiers and the "adjust_jiffies"
237  * function. It is called twice on all CPU frequency changes that have
238  * external effects.
239  */
240 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
241 {
242         struct cpufreq_policy *policy;
243
244         BUG_ON(irqs_disabled());
245
246         freqs->flags = cpufreq_driver->flags;
247         pr_debug("notification %u of frequency transition to %u kHz\n",
248                 state, freqs->new);
249
250         policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
251         switch (state) {
252
253         case CPUFREQ_PRECHANGE:
254                 /* detect if the driver reported a value as "old frequency"
255                  * which is not equal to what the cpufreq core thinks is
256                  * "old frequency".
257                  */
258                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
259                         if ((policy) && (policy->cpu == freqs->cpu) &&
260                             (policy->cur) && (policy->cur != freqs->old)) {
261                                 pr_debug("Warning: CPU frequency is"
262                                         " %u, cpufreq assumed %u kHz.\n",
263                                         freqs->old, policy->cur);
264                                 freqs->old = policy->cur;
265                         }
266                 }
267                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
268                                 CPUFREQ_PRECHANGE, freqs);
269                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
270                 break;
271
272         case CPUFREQ_POSTCHANGE:
273                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
274                 pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
275                         (unsigned long)freqs->cpu);
276                 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
277                 trace_cpu_frequency(freqs->new, freqs->cpu);
278                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
279                                 CPUFREQ_POSTCHANGE, freqs);
280                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
281                         policy->cur = freqs->new;
282                 break;
283         }
284 }
285 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
286
287
288
289 /*********************************************************************
290  *                          SYSFS INTERFACE                          *
291  *********************************************************************/
292
293 static struct cpufreq_governor *__find_governor(const char *str_governor)
294 {
295         struct cpufreq_governor *t;
296
297         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
298                 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
299                         return t;
300
301         return NULL;
302 }
303
304 /**
305  * cpufreq_parse_governor - parse a governor string
306  */
307 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
308                                 struct cpufreq_governor **governor)
309 {
310         int err = -EINVAL;
311
312         if (!cpufreq_driver)
313                 goto out;
314
315         if (cpufreq_driver->setpolicy) {
316                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
317                         *policy = CPUFREQ_POLICY_PERFORMANCE;
318                         err = 0;
319                 } else if (!strnicmp(str_governor, "powersave",
320                                                 CPUFREQ_NAME_LEN)) {
321                         *policy = CPUFREQ_POLICY_POWERSAVE;
322                         err = 0;
323                 }
324         } else if (cpufreq_driver->target) {
325                 struct cpufreq_governor *t;
326
327                 mutex_lock(&cpufreq_governor_mutex);
328
329                 t = __find_governor(str_governor);
330
331                 if (t == NULL) {
332                         int ret;
333
334                         mutex_unlock(&cpufreq_governor_mutex);
335                         ret = request_module("cpufreq_%s", str_governor);
336                         mutex_lock(&cpufreq_governor_mutex);
337
338                         if (ret == 0)
339                                 t = __find_governor(str_governor);
340                 }
341
342                 if (t != NULL) {
343                         *governor = t;
344                         err = 0;
345                 }
346
347                 mutex_unlock(&cpufreq_governor_mutex);
348         }
349 out:
350         return err;
351 }
352
353
354 /**
355  * cpufreq_per_cpu_attr_read() / show_##file_name() -
356  * print out cpufreq information
357  *
358  * Write out information from cpufreq_driver->policy[cpu]; object must be
359  * "unsigned int".
360  */
361
362 #define show_one(file_name, object)                     \
363 static ssize_t show_##file_name                         \
364 (struct cpufreq_policy *policy, char *buf)              \
365 {                                                       \
366         return sprintf(buf, "%u\n", policy->object);    \
367 }
368
369 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
370 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
371 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
372 show_one(scaling_min_freq, min);
373 show_one(scaling_max_freq, max);
374 show_one(scaling_cur_freq, cur);
375
376 static int __cpufreq_set_policy(struct cpufreq_policy *data,
377                                 struct cpufreq_policy *policy);
378
379 /**
380  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
381  */
382 #define store_one(file_name, object)                    \
383 static ssize_t store_##file_name                                        \
384 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
385 {                                                                       \
386         unsigned int ret = -EINVAL;                                     \
387         struct cpufreq_policy new_policy;                               \
388                                                                         \
389         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
390         if (ret)                                                        \
391                 return -EINVAL;                                         \
392                                                                         \
393         ret = sscanf(buf, "%u", &new_policy.object);                    \
394         if (ret != 1)                                                   \
395                 return -EINVAL;                                         \
396                                                                         \
397         ret = __cpufreq_set_policy(policy, &new_policy);                \
398         policy->user_policy.object = policy->object;                    \
399                                                                         \
400         return ret ? ret : count;                                       \
401 }
402
403 store_one(scaling_min_freq, min);
404 store_one(scaling_max_freq, max);
405
406 /**
407  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
408  */
409 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
410                                         char *buf)
411 {
412         unsigned int cur_freq = __cpufreq_get(policy->cpu);
413         if (!cur_freq)
414                 return sprintf(buf, "<unknown>");
415         return sprintf(buf, "%u\n", cur_freq);
416 }
417
418
419 /**
420  * show_scaling_governor - show the current policy for the specified CPU
421  */
422 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
423 {
424         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
425                 return sprintf(buf, "powersave\n");
426         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
427                 return sprintf(buf, "performance\n");
428         else if (policy->governor)
429                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
430                                 policy->governor->name);
431         return -EINVAL;
432 }
433
434
435 /**
436  * store_scaling_governor - store policy for the specified CPU
437  */
438 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
439                                         const char *buf, size_t count)
440 {
441         unsigned int ret = -EINVAL;
442         char    str_governor[16];
443         struct cpufreq_policy new_policy;
444
445         ret = cpufreq_get_policy(&new_policy, policy->cpu);
446         if (ret)
447                 return ret;
448
449         ret = sscanf(buf, "%15s", str_governor);
450         if (ret != 1)
451                 return -EINVAL;
452
453         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
454                                                 &new_policy.governor))
455                 return -EINVAL;
456
457         /* Do not use cpufreq_set_policy here or the user_policy.max
458            will be wrongly overridden */
459         ret = __cpufreq_set_policy(policy, &new_policy);
460
461         policy->user_policy.policy = policy->policy;
462         policy->user_policy.governor = policy->governor;
463
464         if (ret)
465                 return ret;
466         else
467                 return count;
468 }
469
470 /**
471  * show_scaling_driver - show the cpufreq driver currently loaded
472  */
473 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
474 {
475         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
476 }
477
478 /**
479  * show_scaling_available_governors - show the available CPUfreq governors
480  */
481 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
482                                                 char *buf)
483 {
484         ssize_t i = 0;
485         struct cpufreq_governor *t;
486
487         if (!cpufreq_driver->target) {
488                 i += sprintf(buf, "performance powersave");
489                 goto out;
490         }
491
492         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
493                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
494                     - (CPUFREQ_NAME_LEN + 2)))
495                         goto out;
496                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
497         }
498 out:
499         i += sprintf(&buf[i], "\n");
500         return i;
501 }
502
503 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
504 {
505         ssize_t i = 0;
506         unsigned int cpu;
507
508         for_each_cpu(cpu, mask) {
509                 if (i)
510                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
511                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
512                 if (i >= (PAGE_SIZE - 5))
513                         break;
514         }
515         i += sprintf(&buf[i], "\n");
516         return i;
517 }
518
519 /**
520  * show_related_cpus - show the CPUs affected by each transition even if
521  * hw coordination is in use
522  */
523 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
524 {
525         if (cpumask_empty(policy->related_cpus))
526                 return show_cpus(policy->cpus, buf);
527         return show_cpus(policy->related_cpus, buf);
528 }
529
530 /**
531  * show_affected_cpus - show the CPUs affected by each transition
532  */
533 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
534 {
535         return show_cpus(policy->cpus, buf);
536 }
537
538 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
539                                         const char *buf, size_t count)
540 {
541         unsigned int freq = 0;
542         unsigned int ret;
543
544         if (!policy->governor || !policy->governor->store_setspeed)
545                 return -EINVAL;
546
547         ret = sscanf(buf, "%u", &freq);
548         if (ret != 1)
549                 return -EINVAL;
550
551         policy->governor->store_setspeed(policy, freq);
552
553         return count;
554 }
555
556 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
557 {
558         if (!policy->governor || !policy->governor->show_setspeed)
559                 return sprintf(buf, "<unsupported>\n");
560
561         return policy->governor->show_setspeed(policy, buf);
562 }
563
564 /**
565  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
566  */
567 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
568 {
569         unsigned int limit;
570         int ret;
571         if (cpufreq_driver->bios_limit) {
572                 ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
573                 if (!ret)
574                         return sprintf(buf, "%u\n", limit);
575         }
576         return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
577 }
578
579 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
580 cpufreq_freq_attr_ro(cpuinfo_min_freq);
581 cpufreq_freq_attr_ro(cpuinfo_max_freq);
582 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
583 cpufreq_freq_attr_ro(scaling_available_governors);
584 cpufreq_freq_attr_ro(scaling_driver);
585 cpufreq_freq_attr_ro(scaling_cur_freq);
586 cpufreq_freq_attr_ro(bios_limit);
587 cpufreq_freq_attr_ro(related_cpus);
588 cpufreq_freq_attr_ro(affected_cpus);
589 cpufreq_freq_attr_rw(scaling_min_freq);
590 cpufreq_freq_attr_rw(scaling_max_freq);
591 cpufreq_freq_attr_rw(scaling_governor);
592 cpufreq_freq_attr_rw(scaling_setspeed);
593
594 static struct attribute *default_attrs[] = {
595         &cpuinfo_min_freq.attr,
596         &cpuinfo_max_freq.attr,
597         &cpuinfo_transition_latency.attr,
598         &scaling_min_freq.attr,
599         &scaling_max_freq.attr,
600         &affected_cpus.attr,
601         &related_cpus.attr,
602         &scaling_governor.attr,
603         &scaling_driver.attr,
604         &scaling_available_governors.attr,
605         &scaling_setspeed.attr,
606         NULL
607 };
608
609 struct kobject *cpufreq_global_kobject;
610 EXPORT_SYMBOL(cpufreq_global_kobject);
611
612 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
613 #define to_attr(a) container_of(a, struct freq_attr, attr)
614
615 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
616 {
617         struct cpufreq_policy *policy = to_policy(kobj);
618         struct freq_attr *fattr = to_attr(attr);
619         ssize_t ret = -EINVAL;
620         policy = cpufreq_cpu_get(policy->cpu);
621         if (!policy)
622                 goto no_policy;
623
624         if (lock_policy_rwsem_read(policy->cpu) < 0)
625                 goto fail;
626
627         if (fattr->show)
628                 ret = fattr->show(policy, buf);
629         else
630                 ret = -EIO;
631
632         unlock_policy_rwsem_read(policy->cpu);
633 fail:
634         cpufreq_cpu_put(policy);
635 no_policy:
636         return ret;
637 }
638
639 static ssize_t store(struct kobject *kobj, struct attribute *attr,
640                      const char *buf, size_t count)
641 {
642         struct cpufreq_policy *policy = to_policy(kobj);
643         struct freq_attr *fattr = to_attr(attr);
644         ssize_t ret = -EINVAL;
645         policy = cpufreq_cpu_get(policy->cpu);
646         if (!policy)
647                 goto no_policy;
648
649         if (lock_policy_rwsem_write(policy->cpu) < 0)
650                 goto fail;
651
652         if (fattr->store)
653                 ret = fattr->store(policy, buf, count);
654         else
655                 ret = -EIO;
656
657         unlock_policy_rwsem_write(policy->cpu);
658 fail:
659         cpufreq_cpu_put(policy);
660 no_policy:
661         return ret;
662 }
663
664 static void cpufreq_sysfs_release(struct kobject *kobj)
665 {
666         struct cpufreq_policy *policy = to_policy(kobj);
667         pr_debug("last reference is dropped\n");
668         complete(&policy->kobj_unregister);
669 }
670
671 static const struct sysfs_ops sysfs_ops = {
672         .show   = show,
673         .store  = store,
674 };
675
676 static struct kobj_type ktype_cpufreq = {
677         .sysfs_ops      = &sysfs_ops,
678         .default_attrs  = default_attrs,
679         .release        = cpufreq_sysfs_release,
680 };
681
682 /*
683  * Returns:
684  *   Negative: Failure
685  *   0:        Success
686  *   Positive: When we have a managed CPU and the sysfs got symlinked
687  */
688 static int cpufreq_add_dev_policy(unsigned int cpu,
689                                   struct cpufreq_policy *policy,
690                                   struct device *dev)
691 {
692         int ret = 0;
693 #ifdef CONFIG_SMP
694         unsigned long flags;
695         unsigned int j;
696 #ifdef CONFIG_HOTPLUG_CPU
697         struct cpufreq_governor *gov;
698
699         gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
700         if (gov) {
701                 policy->governor = gov;
702                 pr_debug("Restoring governor %s for cpu %d\n",
703                        policy->governor->name, cpu);
704         }
705 #endif
706
707         for_each_cpu(j, policy->cpus) {
708                 struct cpufreq_policy *managed_policy;
709
710                 if (cpu == j)
711                         continue;
712
713                 /* Check for existing affected CPUs.
714                  * They may not be aware of it due to CPU Hotplug.
715                  * cpufreq_cpu_put is called when the device is removed
716                  * in __cpufreq_remove_dev()
717                  */
718                 managed_policy = cpufreq_cpu_get(j);
719                 if (unlikely(managed_policy)) {
720
721                         /* Set proper policy_cpu */
722                         unlock_policy_rwsem_write(cpu);
723                         per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
724
725                         if (lock_policy_rwsem_write(cpu) < 0) {
726                                 /* Should not go through policy unlock path */
727                                 if (cpufreq_driver->exit)
728                                         cpufreq_driver->exit(policy);
729                                 cpufreq_cpu_put(managed_policy);
730                                 return -EBUSY;
731                         }
732
733                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
734                         cpumask_copy(managed_policy->cpus, policy->cpus);
735                         per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
736                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
737
738                         pr_debug("CPU already managed, adding link\n");
739                         ret = sysfs_create_link(&dev->kobj,
740                                                 &managed_policy->kobj,
741                                                 "cpufreq");
742                         if (ret)
743                                 cpufreq_cpu_put(managed_policy);
744                         /*
745                          * Success. We only needed to be added to the mask.
746                          * Call driver->exit() because only the cpu parent of
747                          * the kobj needed to call init().
748                          */
749                         if (cpufreq_driver->exit)
750                                 cpufreq_driver->exit(policy);
751
752                         if (!ret)
753                                 return 1;
754                         else
755                                 return ret;
756                 }
757         }
758 #endif
759         return ret;
760 }
761
762
763 /* symlink affected CPUs */
764 static int cpufreq_add_dev_symlink(unsigned int cpu,
765                                    struct cpufreq_policy *policy)
766 {
767         unsigned int j;
768         int ret = 0;
769
770         for_each_cpu(j, policy->cpus) {
771                 struct cpufreq_policy *managed_policy;
772                 struct device *cpu_dev;
773
774                 if (j == cpu)
775                         continue;
776                 if (!cpu_online(j))
777                         continue;
778
779                 pr_debug("CPU %u already managed, adding link\n", j);
780                 managed_policy = cpufreq_cpu_get(cpu);
781                 cpu_dev = get_cpu_device(j);
782                 ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
783                                         "cpufreq");
784                 if (ret) {
785                         cpufreq_cpu_put(managed_policy);
786                         return ret;
787                 }
788         }
789         return ret;
790 }
791
792 static int cpufreq_add_dev_interface(unsigned int cpu,
793                                      struct cpufreq_policy *policy,
794                                      struct device *dev)
795 {
796         struct cpufreq_policy new_policy;
797         struct freq_attr **drv_attr;
798         unsigned long flags;
799         int ret = 0;
800         unsigned int j;
801
802         /* prepare interface data */
803         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
804                                    &dev->kobj, "cpufreq");
805         if (ret)
806                 return ret;
807
808         /* set up files for this cpu device */
809         drv_attr = cpufreq_driver->attr;
810         while ((drv_attr) && (*drv_attr)) {
811                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
812                 if (ret)
813                         goto err_out_kobj_put;
814                 drv_attr++;
815         }
816         if (cpufreq_driver->get) {
817                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
818                 if (ret)
819                         goto err_out_kobj_put;
820         }
821         if (cpufreq_driver->target) {
822                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
823                 if (ret)
824                         goto err_out_kobj_put;
825         }
826         if (cpufreq_driver->bios_limit) {
827                 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
828                 if (ret)
829                         goto err_out_kobj_put;
830         }
831
832         spin_lock_irqsave(&cpufreq_driver_lock, flags);
833         for_each_cpu(j, policy->cpus) {
834                 if (!cpu_online(j))
835                         continue;
836                 per_cpu(cpufreq_cpu_data, j) = policy;
837                 per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
838         }
839         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
840
841         ret = cpufreq_add_dev_symlink(cpu, policy);
842         if (ret)
843                 goto err_out_kobj_put;
844
845         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
846         /* assure that the starting sequence is run in __cpufreq_set_policy */
847         policy->governor = NULL;
848
849         /* set default policy */
850         ret = __cpufreq_set_policy(policy, &new_policy);
851         policy->user_policy.policy = policy->policy;
852         policy->user_policy.governor = policy->governor;
853
854         if (ret) {
855                 pr_debug("setting policy failed\n");
856                 if (cpufreq_driver->exit)
857                         cpufreq_driver->exit(policy);
858         }
859         return ret;
860
861 err_out_kobj_put:
862         kobject_put(&policy->kobj);
863         wait_for_completion(&policy->kobj_unregister);
864         return ret;
865 }
866
867
868 /**
869  * cpufreq_add_dev - add a CPU device
870  *
871  * Adds the cpufreq interface for a CPU device.
872  *
873  * The Oracle says: try running cpufreq registration/unregistration concurrently
874  * with with cpu hotplugging and all hell will break loose. Tried to clean this
875  * mess up, but more thorough testing is needed. - Mathieu
876  */
877 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
878 {
879         unsigned int cpu = dev->id;
880         int ret = 0, found = 0;
881         struct cpufreq_policy *policy;
882         unsigned long flags;
883         unsigned int j;
884 #ifdef CONFIG_HOTPLUG_CPU
885         int sibling;
886 #endif
887
888         if (cpu_is_offline(cpu))
889                 return 0;
890
891         pr_debug("adding CPU %u\n", cpu);
892
893 #ifdef CONFIG_SMP
894         /* check whether a different CPU already registered this
895          * CPU because it is in the same boat. */
896         policy = cpufreq_cpu_get(cpu);
897         if (unlikely(policy)) {
898                 cpufreq_cpu_put(policy);
899                 return 0;
900         }
901 #endif
902
903         if (!try_module_get(cpufreq_driver->owner)) {
904                 ret = -EINVAL;
905                 goto module_out;
906         }
907
908         ret = -ENOMEM;
909         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
910         if (!policy)
911                 goto nomem_out;
912
913         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
914                 goto err_free_policy;
915
916         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
917                 goto err_free_cpumask;
918
919         policy->cpu = cpu;
920         cpumask_copy(policy->cpus, cpumask_of(cpu));
921
922         /* Initially set CPU itself as the policy_cpu */
923         per_cpu(cpufreq_policy_cpu, cpu) = cpu;
924         ret = (lock_policy_rwsem_write(cpu) < 0);
925         WARN_ON(ret);
926
927         init_completion(&policy->kobj_unregister);
928         INIT_WORK(&policy->update, handle_update);
929
930         /* Set governor before ->init, so that driver could check it */
931 #ifdef CONFIG_HOTPLUG_CPU
932         for_each_online_cpu(sibling) {
933                 struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
934                 if (cp && cp->governor &&
935                     (cpumask_test_cpu(cpu, cp->related_cpus))) {
936                         policy->governor = cp->governor;
937                         found = 1;
938                         break;
939                 }
940         }
941 #endif
942         if (!found)
943                 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
944         /* call driver. From then on the cpufreq must be able
945          * to accept all calls to ->verify and ->setpolicy for this CPU
946          */
947         ret = cpufreq_driver->init(policy);
948         if (ret) {
949                 pr_debug("initialization failed\n");
950                 goto err_unlock_policy;
951         }
952         policy->user_policy.min = policy->min;
953         policy->user_policy.max = policy->max;
954
955         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
956                                      CPUFREQ_START, policy);
957
958         ret = cpufreq_add_dev_policy(cpu, policy, dev);
959         if (ret) {
960                 if (ret > 0)
961                         /* This is a managed cpu, symlink created,
962                            exit with 0 */
963                         ret = 0;
964                 goto err_unlock_policy;
965         }
966
967         ret = cpufreq_add_dev_interface(cpu, policy, dev);
968         if (ret)
969                 goto err_out_unregister;
970
971         unlock_policy_rwsem_write(cpu);
972
973         kobject_uevent(&policy->kobj, KOBJ_ADD);
974         module_put(cpufreq_driver->owner);
975         pr_debug("initialization complete\n");
976
977         return 0;
978
979
980 err_out_unregister:
981         spin_lock_irqsave(&cpufreq_driver_lock, flags);
982         for_each_cpu(j, policy->cpus)
983                 per_cpu(cpufreq_cpu_data, j) = NULL;
984         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
985
986         kobject_put(&policy->kobj);
987         wait_for_completion(&policy->kobj_unregister);
988
989 err_unlock_policy:
990         unlock_policy_rwsem_write(cpu);
991         free_cpumask_var(policy->related_cpus);
992 err_free_cpumask:
993         free_cpumask_var(policy->cpus);
994 err_free_policy:
995         kfree(policy);
996 nomem_out:
997         module_put(cpufreq_driver->owner);
998 module_out:
999         return ret;
1000 }
1001
1002
1003 /**
1004  * __cpufreq_remove_dev - remove a CPU device
1005  *
1006  * Removes the cpufreq interface for a CPU device.
1007  * Caller should already have policy_rwsem in write mode for this CPU.
1008  * This routine frees the rwsem before returning.
1009  */
1010 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1011 {
1012         unsigned int cpu = dev->id;
1013         unsigned long flags;
1014         struct cpufreq_policy *data;
1015         struct kobject *kobj;
1016         struct completion *cmp;
1017 #ifdef CONFIG_SMP
1018         struct device *cpu_dev;
1019         unsigned int j;
1020 #endif
1021
1022         pr_debug("unregistering CPU %u\n", cpu);
1023
1024         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1025         data = per_cpu(cpufreq_cpu_data, cpu);
1026
1027         if (!data) {
1028                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1029                 unlock_policy_rwsem_write(cpu);
1030                 return -EINVAL;
1031         }
1032         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1033
1034
1035 #ifdef CONFIG_SMP
1036         /* if this isn't the CPU which is the parent of the kobj, we
1037          * only need to unlink, put and exit
1038          */
1039         if (unlikely(cpu != data->cpu)) {
1040                 pr_debug("removing link\n");
1041                 cpumask_clear_cpu(cpu, data->cpus);
1042                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1043                 kobj = &dev->kobj;
1044                 cpufreq_cpu_put(data);
1045                 unlock_policy_rwsem_write(cpu);
1046                 sysfs_remove_link(kobj, "cpufreq");
1047                 return 0;
1048         }
1049 #endif
1050
1051 #ifdef CONFIG_SMP
1052
1053 #ifdef CONFIG_HOTPLUG_CPU
1054         strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1055                         CPUFREQ_NAME_LEN);
1056 #endif
1057
1058         /* if we have other CPUs still registered, we need to unlink them,
1059          * or else wait_for_completion below will lock up. Clean the
1060          * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1061          * the sysfs links afterwards.
1062          */
1063         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1064                 for_each_cpu(j, data->cpus) {
1065                         if (j == cpu)
1066                                 continue;
1067                         per_cpu(cpufreq_cpu_data, j) = NULL;
1068                 }
1069         }
1070
1071         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1072
1073         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1074                 for_each_cpu(j, data->cpus) {
1075                         if (j == cpu)
1076                                 continue;
1077                         pr_debug("removing link for cpu %u\n", j);
1078 #ifdef CONFIG_HOTPLUG_CPU
1079                         strncpy(per_cpu(cpufreq_cpu_governor, j),
1080                                 data->governor->name, CPUFREQ_NAME_LEN);
1081 #endif
1082                         cpu_dev = get_cpu_device(j);
1083                         kobj = &cpu_dev->kobj;
1084                         unlock_policy_rwsem_write(cpu);
1085                         sysfs_remove_link(kobj, "cpufreq");
1086                         lock_policy_rwsem_write(cpu);
1087                         cpufreq_cpu_put(data);
1088                 }
1089         }
1090 #else
1091         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1092 #endif
1093
1094         if (cpufreq_driver->target)
1095                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1096
1097         kobj = &data->kobj;
1098         cmp = &data->kobj_unregister;
1099         unlock_policy_rwsem_write(cpu);
1100         kobject_put(kobj);
1101
1102         /* we need to make sure that the underlying kobj is actually
1103          * not referenced anymore by anybody before we proceed with
1104          * unloading.
1105          */
1106         pr_debug("waiting for dropping of refcount\n");
1107         wait_for_completion(cmp);
1108         pr_debug("wait complete\n");
1109
1110         lock_policy_rwsem_write(cpu);
1111         if (cpufreq_driver->exit)
1112                 cpufreq_driver->exit(data);
1113         unlock_policy_rwsem_write(cpu);
1114
1115 #ifdef CONFIG_HOTPLUG_CPU
1116         /* when the CPU which is the parent of the kobj is hotplugged
1117          * offline, check for siblings, and create cpufreq sysfs interface
1118          * and symlinks
1119          */
1120         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1121                 /* first sibling now owns the new sysfs dir */
1122                 cpumask_clear_cpu(cpu, data->cpus);
1123                 cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1124
1125                 /* finally remove our own symlink */
1126                 lock_policy_rwsem_write(cpu);
1127                 __cpufreq_remove_dev(dev, sif);
1128         }
1129 #endif
1130
1131         free_cpumask_var(data->related_cpus);
1132         free_cpumask_var(data->cpus);
1133         kfree(data);
1134
1135         return 0;
1136 }
1137
1138
1139 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1140 {
1141         unsigned int cpu = dev->id;
1142         int retval;
1143
1144         if (cpu_is_offline(cpu))
1145                 return 0;
1146
1147         if (unlikely(lock_policy_rwsem_write(cpu)))
1148                 BUG();
1149
1150         retval = __cpufreq_remove_dev(dev, sif);
1151         return retval;
1152 }
1153
1154
1155 static void handle_update(struct work_struct *work)
1156 {
1157         struct cpufreq_policy *policy =
1158                 container_of(work, struct cpufreq_policy, update);
1159         unsigned int cpu = policy->cpu;
1160         pr_debug("handle_update for cpu %u called\n", cpu);
1161         cpufreq_update_policy(cpu);
1162 }
1163
1164 /**
1165  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1166  *      @cpu: cpu number
1167  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1168  *      @new_freq: CPU frequency the CPU actually runs at
1169  *
1170  *      We adjust to current frequency first, and need to clean up later.
1171  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1172  */
1173 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1174                                 unsigned int new_freq)
1175 {
1176         struct cpufreq_freqs freqs;
1177
1178         pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1179                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1180
1181         freqs.cpu = cpu;
1182         freqs.old = old_freq;
1183         freqs.new = new_freq;
1184         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1185         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1186 }
1187
1188
1189 /**
1190  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1191  * @cpu: CPU number
1192  *
1193  * This is the last known freq, without actually getting it from the driver.
1194  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1195  */
1196 unsigned int cpufreq_quick_get(unsigned int cpu)
1197 {
1198         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1199         unsigned int ret_freq = 0;
1200
1201         if (policy) {
1202                 ret_freq = policy->cur;
1203                 cpufreq_cpu_put(policy);
1204         }
1205
1206         return ret_freq;
1207 }
1208 EXPORT_SYMBOL(cpufreq_quick_get);
1209
1210 /**
1211  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1212  * @cpu: CPU number
1213  *
1214  * Just return the max possible frequency for a given CPU.
1215  */
1216 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1217 {
1218         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1219         unsigned int ret_freq = 0;
1220
1221         if (policy) {
1222                 ret_freq = policy->max;
1223                 cpufreq_cpu_put(policy);
1224         }
1225
1226         return ret_freq;
1227 }
1228 EXPORT_SYMBOL(cpufreq_quick_get_max);
1229
1230
1231 static unsigned int __cpufreq_get(unsigned int cpu)
1232 {
1233         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1234         unsigned int ret_freq = 0;
1235
1236         if (!cpufreq_driver->get)
1237                 return ret_freq;
1238
1239         ret_freq = cpufreq_driver->get(cpu);
1240
1241         if (ret_freq && policy->cur &&
1242                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1243                 /* verify no discrepancy between actual and
1244                                         saved value exists */
1245                 if (unlikely(ret_freq != policy->cur)) {
1246                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1247                         schedule_work(&policy->update);
1248                 }
1249         }
1250
1251         return ret_freq;
1252 }
1253
1254 /**
1255  * cpufreq_get - get the current CPU frequency (in kHz)
1256  * @cpu: CPU number
1257  *
1258  * Get the CPU current (static) CPU frequency
1259  */
1260 unsigned int cpufreq_get(unsigned int cpu)
1261 {
1262         unsigned int ret_freq = 0;
1263         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1264
1265         if (!policy)
1266                 goto out;
1267
1268         if (unlikely(lock_policy_rwsem_read(cpu)))
1269                 goto out_policy;
1270
1271         ret_freq = __cpufreq_get(cpu);
1272
1273         unlock_policy_rwsem_read(cpu);
1274
1275 out_policy:
1276         cpufreq_cpu_put(policy);
1277 out:
1278         return ret_freq;
1279 }
1280 EXPORT_SYMBOL(cpufreq_get);
1281
1282 static struct subsys_interface cpufreq_interface = {
1283         .name           = "cpufreq",
1284         .subsys         = &cpu_subsys,
1285         .add_dev        = cpufreq_add_dev,
1286         .remove_dev     = cpufreq_remove_dev,
1287 };
1288
1289
1290 /**
1291  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1292  *
1293  * This function is only executed for the boot processor.  The other CPUs
1294  * have been put offline by means of CPU hotplug.
1295  */
1296 static int cpufreq_bp_suspend(void)
1297 {
1298         int ret = 0;
1299
1300         int cpu = smp_processor_id();
1301         struct cpufreq_policy *cpu_policy;
1302
1303         pr_debug("suspending cpu %u\n", cpu);
1304
1305         /* If there's no policy for the boot CPU, we have nothing to do. */
1306         cpu_policy = cpufreq_cpu_get(cpu);
1307         if (!cpu_policy)
1308                 return 0;
1309
1310         if (cpufreq_driver->suspend) {
1311                 ret = cpufreq_driver->suspend(cpu_policy);
1312                 if (ret)
1313                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1314                                         "step on CPU %u\n", cpu_policy->cpu);
1315         }
1316
1317         cpufreq_cpu_put(cpu_policy);
1318         return ret;
1319 }
1320
1321 /**
1322  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1323  *
1324  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1325  *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1326  *          restored. It will verify that the current freq is in sync with
1327  *          what we believe it to be. This is a bit later than when it
1328  *          should be, but nonethteless it's better than calling
1329  *          cpufreq_driver->get() here which might re-enable interrupts...
1330  *
1331  * This function is only executed for the boot CPU.  The other CPUs have not
1332  * been turned on yet.
1333  */
1334 static void cpufreq_bp_resume(void)
1335 {
1336         int ret = 0;
1337
1338         int cpu = smp_processor_id();
1339         struct cpufreq_policy *cpu_policy;
1340
1341         pr_debug("resuming cpu %u\n", cpu);
1342
1343         /* If there's no policy for the boot CPU, we have nothing to do. */
1344         cpu_policy = cpufreq_cpu_get(cpu);
1345         if (!cpu_policy)
1346                 return;
1347
1348         if (cpufreq_driver->resume) {
1349                 ret = cpufreq_driver->resume(cpu_policy);
1350                 if (ret) {
1351                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1352                                         "step on CPU %u\n", cpu_policy->cpu);
1353                         goto fail;
1354                 }
1355         }
1356
1357         schedule_work(&cpu_policy->update);
1358
1359 fail:
1360         cpufreq_cpu_put(cpu_policy);
1361 }
1362
1363 static struct syscore_ops cpufreq_syscore_ops = {
1364         .suspend        = cpufreq_bp_suspend,
1365         .resume         = cpufreq_bp_resume,
1366 };
1367
1368
1369 /*********************************************************************
1370  *                     NOTIFIER LISTS INTERFACE                      *
1371  *********************************************************************/
1372
1373 /**
1374  *      cpufreq_register_notifier - register a driver with cpufreq
1375  *      @nb: notifier function to register
1376  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1377  *
1378  *      Add a driver to one of two lists: either a list of drivers that
1379  *      are notified about clock rate changes (once before and once after
1380  *      the transition), or a list of drivers that are notified about
1381  *      changes in cpufreq policy.
1382  *
1383  *      This function may sleep, and has the same return conditions as
1384  *      blocking_notifier_chain_register.
1385  */
1386 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1387 {
1388         int ret;
1389
1390         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1391
1392         switch (list) {
1393         case CPUFREQ_TRANSITION_NOTIFIER:
1394                 ret = srcu_notifier_chain_register(
1395                                 &cpufreq_transition_notifier_list, nb);
1396                 break;
1397         case CPUFREQ_POLICY_NOTIFIER:
1398                 ret = blocking_notifier_chain_register(
1399                                 &cpufreq_policy_notifier_list, nb);
1400                 break;
1401         default:
1402                 ret = -EINVAL;
1403         }
1404
1405         return ret;
1406 }
1407 EXPORT_SYMBOL(cpufreq_register_notifier);
1408
1409
1410 /**
1411  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1412  *      @nb: notifier block to be unregistered
1413  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1414  *
1415  *      Remove a driver from the CPU frequency notifier list.
1416  *
1417  *      This function may sleep, and has the same return conditions as
1418  *      blocking_notifier_chain_unregister.
1419  */
1420 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1421 {
1422         int ret;
1423
1424         switch (list) {
1425         case CPUFREQ_TRANSITION_NOTIFIER:
1426                 ret = srcu_notifier_chain_unregister(
1427                                 &cpufreq_transition_notifier_list, nb);
1428                 break;
1429         case CPUFREQ_POLICY_NOTIFIER:
1430                 ret = blocking_notifier_chain_unregister(
1431                                 &cpufreq_policy_notifier_list, nb);
1432                 break;
1433         default:
1434                 ret = -EINVAL;
1435         }
1436
1437         return ret;
1438 }
1439 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1440
1441
1442 /*********************************************************************
1443  *                              GOVERNORS                            *
1444  *********************************************************************/
1445
1446
1447 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1448                             unsigned int target_freq,
1449                             unsigned int relation)
1450 {
1451         int retval = -EINVAL;
1452
1453         if (cpufreq_disabled())
1454                 return -ENODEV;
1455
1456         pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1457                 target_freq, relation);
1458         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1459                 retval = cpufreq_driver->target(policy, target_freq, relation);
1460
1461         return retval;
1462 }
1463 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1464
1465 int cpufreq_driver_target(struct cpufreq_policy *policy,
1466                           unsigned int target_freq,
1467                           unsigned int relation)
1468 {
1469         int ret = -EINVAL;
1470
1471         policy = cpufreq_cpu_get(policy->cpu);
1472         if (!policy)
1473                 goto no_policy;
1474
1475         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1476                 goto fail;
1477
1478         ret = __cpufreq_driver_target(policy, target_freq, relation);
1479
1480         unlock_policy_rwsem_write(policy->cpu);
1481
1482 fail:
1483         cpufreq_cpu_put(policy);
1484 no_policy:
1485         return ret;
1486 }
1487 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1488
1489 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1490 {
1491         int ret = 0;
1492
1493         policy = cpufreq_cpu_get(policy->cpu);
1494         if (!policy)
1495                 return -EINVAL;
1496
1497         if (cpu_online(cpu) && cpufreq_driver->getavg)
1498                 ret = cpufreq_driver->getavg(policy, cpu);
1499
1500         cpufreq_cpu_put(policy);
1501         return ret;
1502 }
1503 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1504
1505 /*
1506  * when "event" is CPUFREQ_GOV_LIMITS
1507  */
1508
1509 static int __cpufreq_governor(struct cpufreq_policy *policy,
1510                                         unsigned int event)
1511 {
1512         int ret;
1513
1514         /* Only must be defined when default governor is known to have latency
1515            restrictions, like e.g. conservative or ondemand.
1516            That this is the case is already ensured in Kconfig
1517         */
1518 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1519         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1520 #else
1521         struct cpufreq_governor *gov = NULL;
1522 #endif
1523
1524         if (policy->governor->max_transition_latency &&
1525             policy->cpuinfo.transition_latency >
1526             policy->governor->max_transition_latency) {
1527                 if (!gov)
1528                         return -EINVAL;
1529                 else {
1530                         printk(KERN_WARNING "%s governor failed, too long"
1531                                " transition latency of HW, fallback"
1532                                " to %s governor\n",
1533                                policy->governor->name,
1534                                gov->name);
1535                         policy->governor = gov;
1536                 }
1537         }
1538
1539         if (!try_module_get(policy->governor->owner))
1540                 return -EINVAL;
1541
1542         pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1543                                                 policy->cpu, event);
1544         ret = policy->governor->governor(policy, event);
1545
1546         /* we keep one module reference alive for
1547                         each CPU governed by this CPU */
1548         if ((event != CPUFREQ_GOV_START) || ret)
1549                 module_put(policy->governor->owner);
1550         if ((event == CPUFREQ_GOV_STOP) && !ret)
1551                 module_put(policy->governor->owner);
1552
1553         return ret;
1554 }
1555
1556
1557 int cpufreq_register_governor(struct cpufreq_governor *governor)
1558 {
1559         int err;
1560
1561         if (!governor)
1562                 return -EINVAL;
1563
1564         if (cpufreq_disabled())
1565                 return -ENODEV;
1566
1567         mutex_lock(&cpufreq_governor_mutex);
1568
1569         err = -EBUSY;
1570         if (__find_governor(governor->name) == NULL) {
1571                 err = 0;
1572                 list_add(&governor->governor_list, &cpufreq_governor_list);
1573         }
1574
1575         mutex_unlock(&cpufreq_governor_mutex);
1576         return err;
1577 }
1578 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1579
1580
1581 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1582 {
1583 #ifdef CONFIG_HOTPLUG_CPU
1584         int cpu;
1585 #endif
1586
1587         if (!governor)
1588                 return;
1589
1590         if (cpufreq_disabled())
1591                 return;
1592
1593 #ifdef CONFIG_HOTPLUG_CPU
1594         for_each_present_cpu(cpu) {
1595                 if (cpu_online(cpu))
1596                         continue;
1597                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1598                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1599         }
1600 #endif
1601
1602         mutex_lock(&cpufreq_governor_mutex);
1603         list_del(&governor->governor_list);
1604         mutex_unlock(&cpufreq_governor_mutex);
1605         return;
1606 }
1607 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1608
1609
1610
1611 /*********************************************************************
1612  *                          POLICY INTERFACE                         *
1613  *********************************************************************/
1614
1615 /**
1616  * cpufreq_get_policy - get the current cpufreq_policy
1617  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1618  *      is written
1619  *
1620  * Reads the current cpufreq policy.
1621  */
1622 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1623 {
1624         struct cpufreq_policy *cpu_policy;
1625         if (!policy)
1626                 return -EINVAL;
1627
1628         cpu_policy = cpufreq_cpu_get(cpu);
1629         if (!cpu_policy)
1630                 return -EINVAL;
1631
1632         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1633
1634         cpufreq_cpu_put(cpu_policy);
1635         return 0;
1636 }
1637 EXPORT_SYMBOL(cpufreq_get_policy);
1638
1639
1640 /*
1641  * data   : current policy.
1642  * policy : policy to be set.
1643  */
1644 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1645                                 struct cpufreq_policy *policy)
1646 {
1647         int ret = 0;
1648
1649         pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1650                 policy->min, policy->max);
1651
1652         memcpy(&policy->cpuinfo, &data->cpuinfo,
1653                                 sizeof(struct cpufreq_cpuinfo));
1654
1655         if (policy->min > data->max || policy->max < data->min) {
1656                 ret = -EINVAL;
1657                 goto error_out;
1658         }
1659
1660         /* verify the cpu speed can be set within this limit */
1661         ret = cpufreq_driver->verify(policy);
1662         if (ret)
1663                 goto error_out;
1664
1665         /* adjust if necessary - all reasons */
1666         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1667                         CPUFREQ_ADJUST, policy);
1668
1669         /* adjust if necessary - hardware incompatibility*/
1670         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1671                         CPUFREQ_INCOMPATIBLE, policy);
1672
1673         /* verify the cpu speed can be set within this limit,
1674            which might be different to the first one */
1675         ret = cpufreq_driver->verify(policy);
1676         if (ret)
1677                 goto error_out;
1678
1679         /* notification of the new policy */
1680         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1681                         CPUFREQ_NOTIFY, policy);
1682
1683         data->min = policy->min;
1684         data->max = policy->max;
1685
1686         pr_debug("new min and max freqs are %u - %u kHz\n",
1687                                         data->min, data->max);
1688
1689         if (cpufreq_driver->setpolicy) {
1690                 data->policy = policy->policy;
1691                 pr_debug("setting range\n");
1692                 ret = cpufreq_driver->setpolicy(policy);
1693         } else {
1694                 if (policy->governor != data->governor) {
1695                         /* save old, working values */
1696                         struct cpufreq_governor *old_gov = data->governor;
1697
1698                         pr_debug("governor switch\n");
1699
1700                         /* end old governor */
1701                         if (data->governor)
1702                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1703
1704                         /* start new governor */
1705                         data->governor = policy->governor;
1706                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1707                                 /* new governor failed, so re-start old one */
1708                                 pr_debug("starting governor %s failed\n",
1709                                                         data->governor->name);
1710                                 if (old_gov) {
1711                                         data->governor = old_gov;
1712                                         __cpufreq_governor(data,
1713                                                            CPUFREQ_GOV_START);
1714                                 }
1715                                 ret = -EINVAL;
1716                                 goto error_out;
1717                         }
1718                         /* might be a policy change, too, so fall through */
1719                 }
1720                 pr_debug("governor: change or update limits\n");
1721                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1722         }
1723
1724 error_out:
1725         return ret;
1726 }
1727
1728 /**
1729  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1730  *      @cpu: CPU which shall be re-evaluated
1731  *
1732  *      Useful for policy notifiers which have different necessities
1733  *      at different times.
1734  */
1735 int cpufreq_update_policy(unsigned int cpu)
1736 {
1737         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1738         struct cpufreq_policy policy;
1739         int ret;
1740
1741         if (!data) {
1742                 ret = -ENODEV;
1743                 goto no_policy;
1744         }
1745
1746         if (unlikely(lock_policy_rwsem_write(cpu))) {
1747                 ret = -EINVAL;
1748                 goto fail;
1749         }
1750
1751         pr_debug("updating policy for CPU %u\n", cpu);
1752         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1753         policy.min = data->user_policy.min;
1754         policy.max = data->user_policy.max;
1755         policy.policy = data->user_policy.policy;
1756         policy.governor = data->user_policy.governor;
1757
1758         /* BIOS might change freq behind our back
1759           -> ask driver for current freq and notify governors about a change */
1760         if (cpufreq_driver->get) {
1761                 policy.cur = cpufreq_driver->get(cpu);
1762                 if (!data->cur) {
1763                         pr_debug("Driver did not initialize current freq");
1764                         data->cur = policy.cur;
1765                 } else {
1766                         if (data->cur != policy.cur)
1767                                 cpufreq_out_of_sync(cpu, data->cur,
1768                                                                 policy.cur);
1769                 }
1770         }
1771
1772         ret = __cpufreq_set_policy(data, &policy);
1773
1774         unlock_policy_rwsem_write(cpu);
1775
1776 fail:
1777         cpufreq_cpu_put(data);
1778 no_policy:
1779         return ret;
1780 }
1781 EXPORT_SYMBOL(cpufreq_update_policy);
1782
1783 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1784                                         unsigned long action, void *hcpu)
1785 {
1786         unsigned int cpu = (unsigned long)hcpu;
1787         struct device *dev;
1788
1789         dev = get_cpu_device(cpu);
1790         if (dev) {
1791                 switch (action) {
1792                 case CPU_ONLINE:
1793                 case CPU_ONLINE_FROZEN:
1794                         cpufreq_add_dev(dev, NULL);
1795                         break;
1796                 case CPU_DOWN_PREPARE:
1797                 case CPU_DOWN_PREPARE_FROZEN:
1798                         if (unlikely(lock_policy_rwsem_write(cpu)))
1799                                 BUG();
1800
1801                         __cpufreq_remove_dev(dev, NULL);
1802                         break;
1803                 case CPU_DOWN_FAILED:
1804                 case CPU_DOWN_FAILED_FROZEN:
1805                         cpufreq_add_dev(dev, NULL);
1806                         break;
1807                 }
1808         }
1809         return NOTIFY_OK;
1810 }
1811
1812 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1813     .notifier_call = cpufreq_cpu_callback,
1814 };
1815
1816 /*********************************************************************
1817  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1818  *********************************************************************/
1819
1820 /**
1821  * cpufreq_register_driver - register a CPU Frequency driver
1822  * @driver_data: A struct cpufreq_driver containing the values#
1823  * submitted by the CPU Frequency driver.
1824  *
1825  *   Registers a CPU Frequency driver to this core code. This code
1826  * returns zero on success, -EBUSY when another driver got here first
1827  * (and isn't unregistered in the meantime).
1828  *
1829  */
1830 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1831 {
1832         unsigned long flags;
1833         int ret;
1834
1835         if (cpufreq_disabled())
1836                 return -ENODEV;
1837
1838         if (!driver_data || !driver_data->verify || !driver_data->init ||
1839             ((!driver_data->setpolicy) && (!driver_data->target)))
1840                 return -EINVAL;
1841
1842         pr_debug("trying to register driver %s\n", driver_data->name);
1843
1844         if (driver_data->setpolicy)
1845                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1846
1847         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1848         if (cpufreq_driver) {
1849                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1850                 return -EBUSY;
1851         }
1852         cpufreq_driver = driver_data;
1853         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1854
1855         ret = subsys_interface_register(&cpufreq_interface);
1856         if (ret)
1857                 goto err_null_driver;
1858
1859         if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1860                 int i;
1861                 ret = -ENODEV;
1862
1863                 /* check for at least one working CPU */
1864                 for (i = 0; i < nr_cpu_ids; i++)
1865                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1866                                 ret = 0;
1867                                 break;
1868                         }
1869
1870                 /* if all ->init() calls failed, unregister */
1871                 if (ret) {
1872                         pr_debug("no CPU initialized for driver %s\n",
1873                                                         driver_data->name);
1874                         goto err_if_unreg;
1875                 }
1876         }
1877
1878         register_hotcpu_notifier(&cpufreq_cpu_notifier);
1879         pr_debug("driver %s up and running\n", driver_data->name);
1880
1881         return 0;
1882 err_if_unreg:
1883         subsys_interface_unregister(&cpufreq_interface);
1884 err_null_driver:
1885         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1886         cpufreq_driver = NULL;
1887         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1888         return ret;
1889 }
1890 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1891
1892
1893 /**
1894  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1895  *
1896  *    Unregister the current CPUFreq driver. Only call this if you have
1897  * the right to do so, i.e. if you have succeeded in initialising before!
1898  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1899  * currently not initialised.
1900  */
1901 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1902 {
1903         unsigned long flags;
1904
1905         if (!cpufreq_driver || (driver != cpufreq_driver))
1906                 return -EINVAL;
1907
1908         pr_debug("unregistering driver %s\n", driver->name);
1909
1910         subsys_interface_unregister(&cpufreq_interface);
1911         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1912
1913         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1914         cpufreq_driver = NULL;
1915         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1916
1917         return 0;
1918 }
1919 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1920
1921 static int __init cpufreq_core_init(void)
1922 {
1923         int cpu;
1924
1925         if (cpufreq_disabled())
1926                 return -ENODEV;
1927
1928         for_each_possible_cpu(cpu) {
1929                 per_cpu(cpufreq_policy_cpu, cpu) = -1;
1930                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1931         }
1932
1933         cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1934         BUG_ON(!cpufreq_global_kobject);
1935         register_syscore_ops(&cpufreq_syscore_ops);
1936
1937         return 0;
1938 }
1939 core_initcall(cpufreq_core_init);