Linux-2.6.12-rc2
[linux-flexiantxendom0-natty.git] / arch / ppc64 / kernel / idle.c
1 /*
2  * Idle daemon for PowerPC.  Idle daemon will handle any action
3  * that needs to be taken when the system becomes idle.
4  *
5  * Originally Written by Cort Dougan (cort@cs.nmt.edu)
6  *
7  * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
8  *
9  * Additional shared processor, SMT, and firmware support
10  *    Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; either version
15  * 2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <linux/sched.h>
20 #include <linux/kernel.h>
21 #include <linux/smp.h>
22 #include <linux/cpu.h>
23 #include <linux/module.h>
24 #include <linux/sysctl.h>
25 #include <linux/smp.h>
26
27 #include <asm/system.h>
28 #include <asm/processor.h>
29 #include <asm/mmu.h>
30 #include <asm/cputable.h>
31 #include <asm/time.h>
32 #include <asm/iSeries/HvCall.h>
33 #include <asm/iSeries/ItLpQueue.h>
34 #include <asm/plpar_wrappers.h>
35 #include <asm/systemcfg.h>
36
37 extern void power4_idle(void);
38
39 static int (*idle_loop)(void);
40
41 #ifdef CONFIG_PPC_ISERIES
42 static unsigned long maxYieldTime = 0;
43 static unsigned long minYieldTime = 0xffffffffffffffffUL;
44
45 static void yield_shared_processor(void)
46 {
47         unsigned long tb;
48         unsigned long yieldTime;
49
50         HvCall_setEnabledInterrupts(HvCall_MaskIPI |
51                                     HvCall_MaskLpEvent |
52                                     HvCall_MaskLpProd |
53                                     HvCall_MaskTimeout);
54
55         tb = get_tb();
56         /* Compute future tb value when yield should expire */
57         HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
58
59         yieldTime = get_tb() - tb;
60         if (yieldTime > maxYieldTime)
61                 maxYieldTime = yieldTime;
62
63         if (yieldTime < minYieldTime)
64                 minYieldTime = yieldTime;
65         
66         /*
67          * The decrementer stops during the yield.  Force a fake decrementer
68          * here and let the timer_interrupt code sort out the actual time.
69          */
70         get_paca()->lppaca.int_dword.fields.decr_int = 1;
71         process_iSeries_events();
72 }
73
74 static int iSeries_idle(void)
75 {
76         struct paca_struct *lpaca;
77         long oldval;
78         unsigned long CTRL;
79
80         /* ensure iSeries run light will be out when idle */
81         clear_thread_flag(TIF_RUN_LIGHT);
82         CTRL = mfspr(CTRLF);
83         CTRL &= ~RUNLATCH;
84         mtspr(CTRLT, CTRL);
85
86         lpaca = get_paca();
87
88         while (1) {
89                 if (lpaca->lppaca.shared_proc) {
90                         if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
91                                 process_iSeries_events();
92                         if (!need_resched())
93                                 yield_shared_processor();
94                 } else {
95                         oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
96
97                         if (!oldval) {
98                                 set_thread_flag(TIF_POLLING_NRFLAG);
99
100                                 while (!need_resched()) {
101                                         HMT_medium();
102                                         if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
103                                                 process_iSeries_events();
104                                         HMT_low();
105                                 }
106
107                                 HMT_medium();
108                                 clear_thread_flag(TIF_POLLING_NRFLAG);
109                         } else {
110                                 set_need_resched();
111                         }
112                 }
113
114                 schedule();
115         }
116
117         return 0;
118 }
119
120 #else
121
122 static int default_idle(void)
123 {
124         long oldval;
125         unsigned int cpu = smp_processor_id();
126
127         while (1) {
128                 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
129
130                 if (!oldval) {
131                         set_thread_flag(TIF_POLLING_NRFLAG);
132
133                         while (!need_resched() && !cpu_is_offline(cpu)) {
134                                 barrier();
135                                 /*
136                                  * Go into low thread priority and possibly
137                                  * low power mode.
138                                  */
139                                 HMT_low();
140                                 HMT_very_low();
141                         }
142
143                         HMT_medium();
144                         clear_thread_flag(TIF_POLLING_NRFLAG);
145                 } else {
146                         set_need_resched();
147                 }
148
149                 schedule();
150                 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
151                         cpu_die();
152         }
153
154         return 0;
155 }
156
157 #ifdef CONFIG_PPC_PSERIES
158
159 DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
160
161 int dedicated_idle(void)
162 {
163         long oldval;
164         struct paca_struct *lpaca = get_paca(), *ppaca;
165         unsigned long start_snooze;
166         unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
167         unsigned int cpu = smp_processor_id();
168
169         ppaca = &paca[cpu ^ 1];
170
171         while (1) {
172                 /*
173                  * Indicate to the HV that we are idle. Now would be
174                  * a good time to find other work to dispatch.
175                  */
176                 lpaca->lppaca.idle = 1;
177
178                 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
179                 if (!oldval) {
180                         set_thread_flag(TIF_POLLING_NRFLAG);
181                         start_snooze = __get_tb() +
182                                 *smt_snooze_delay * tb_ticks_per_usec;
183                         while (!need_resched() && !cpu_is_offline(cpu)) {
184                                 /*
185                                  * Go into low thread priority and possibly
186                                  * low power mode.
187                                  */
188                                 HMT_low();
189                                 HMT_very_low();
190
191                                 if (*smt_snooze_delay == 0 ||
192                                     __get_tb() < start_snooze)
193                                         continue;
194
195                                 HMT_medium();
196
197                                 if (!(ppaca->lppaca.idle)) {
198                                         local_irq_disable();
199
200                                         /*
201                                          * We are about to sleep the thread
202                                          * and so wont be polling any
203                                          * more.
204                                          */
205                                         clear_thread_flag(TIF_POLLING_NRFLAG);
206
207                                         /*
208                                          * SMT dynamic mode. Cede will result
209                                          * in this thread going dormant, if the
210                                          * partner thread is still doing work.
211                                          * Thread wakes up if partner goes idle,
212                                          * an interrupt is presented, or a prod
213                                          * occurs.  Returning from the cede
214                                          * enables external interrupts.
215                                          */
216                                         if (!need_resched())
217                                                 cede_processor();
218                                         else
219                                                 local_irq_enable();
220                                 } else {
221                                         /*
222                                          * Give the HV an opportunity at the
223                                          * processor, since we are not doing
224                                          * any work.
225                                          */
226                                         poll_pending();
227                                 }
228                         }
229
230                         clear_thread_flag(TIF_POLLING_NRFLAG);
231                 } else {
232                         set_need_resched();
233                 }
234
235                 HMT_medium();
236                 lpaca->lppaca.idle = 0;
237                 schedule();
238                 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
239                         cpu_die();
240         }
241         return 0;
242 }
243
244 static int shared_idle(void)
245 {
246         struct paca_struct *lpaca = get_paca();
247         unsigned int cpu = smp_processor_id();
248
249         while (1) {
250                 /*
251                  * Indicate to the HV that we are idle. Now would be
252                  * a good time to find other work to dispatch.
253                  */
254                 lpaca->lppaca.idle = 1;
255
256                 while (!need_resched() && !cpu_is_offline(cpu)) {
257                         local_irq_disable();
258
259                         /*
260                          * Yield the processor to the hypervisor.  We return if
261                          * an external interrupt occurs (which are driven prior
262                          * to returning here) or if a prod occurs from another 
263                          * processor. When returning here, external interrupts
264                          * are enabled.
265                          *
266                          * Check need_resched() again with interrupts disabled
267                          * to avoid a race.
268                          */
269                         if (!need_resched())
270                                 cede_processor();
271                         else
272                                 local_irq_enable();
273                 }
274
275                 HMT_medium();
276                 lpaca->lppaca.idle = 0;
277                 schedule();
278                 if (cpu_is_offline(smp_processor_id()) &&
279                     system_state == SYSTEM_RUNNING)
280                         cpu_die();
281         }
282
283         return 0;
284 }
285
286 #endif /* CONFIG_PPC_PSERIES */
287
288 static int native_idle(void)
289 {
290         while(1) {
291                 /* check CPU type here */
292                 if (!need_resched())
293                         power4_idle();
294                 if (need_resched())
295                         schedule();
296
297                 if (cpu_is_offline(_smp_processor_id()) &&
298                     system_state == SYSTEM_RUNNING)
299                         cpu_die();
300         }
301         return 0;
302 }
303
304 #endif /* CONFIG_PPC_ISERIES */
305
306 void cpu_idle(void)
307 {
308         idle_loop();
309 }
310
311 int powersave_nap;
312
313 #ifdef CONFIG_SYSCTL
314 /*
315  * Register the sysctl to set/clear powersave_nap.
316  */
317 static ctl_table powersave_nap_ctl_table[]={
318         {
319                 .ctl_name       = KERN_PPC_POWERSAVE_NAP,
320                 .procname       = "powersave-nap",
321                 .data           = &powersave_nap,
322                 .maxlen         = sizeof(int),
323                 .mode           = 0644,
324                 .proc_handler   = &proc_dointvec,
325         },
326         { 0, },
327 };
328 static ctl_table powersave_nap_sysctl_root[] = {
329         { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, },
330         { 0,},
331 };
332
333 static int __init
334 register_powersave_nap_sysctl(void)
335 {
336         register_sysctl_table(powersave_nap_sysctl_root, 0);
337
338         return 0;
339 }
340 __initcall(register_powersave_nap_sysctl);
341 #endif
342
343 int idle_setup(void)
344 {
345         /*
346          * Move that junk to each platform specific file, eventually define
347          * a pSeries_idle for shared processor stuff
348          */
349 #ifdef CONFIG_PPC_ISERIES
350         idle_loop = iSeries_idle;
351         return 1;
352 #else
353         idle_loop = default_idle;
354 #endif
355 #ifdef CONFIG_PPC_PSERIES
356         if (systemcfg->platform & PLATFORM_PSERIES) {
357                 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
358                         if (get_paca()->lppaca.shared_proc) {
359                                 printk(KERN_INFO "Using shared processor idle loop\n");
360                                 idle_loop = shared_idle;
361                         } else {
362                                 printk(KERN_INFO "Using dedicated idle loop\n");
363                                 idle_loop = dedicated_idle;
364                         }
365                 } else {
366                         printk(KERN_INFO "Using default idle loop\n");
367                         idle_loop = default_idle;
368                 }
369         }
370 #endif /* CONFIG_PPC_PSERIES */
371 #ifndef CONFIG_PPC_ISERIES
372         if (systemcfg->platform == PLATFORM_POWERMAC ||
373             systemcfg->platform == PLATFORM_MAPLE) {
374                 printk(KERN_INFO "Using native/NAP idle loop\n");
375                 idle_loop = native_idle;
376         }
377 #endif /* CONFIG_PPC_ISERIES */
378
379         return 1;
380 }