2 * linux/kernel/suspend.c
4 * This file is to realize architecture-independent
5 * machine suspend feature using pretty near only high-level routines
7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
8 * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
10 * I'd like to thank the following people for their work:
12 * Pavel Machek <pavel@ucw.cz>:
13 * Modifications, defectiveness pointing, being with me at the very beginning,
14 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
16 * Steve Doddi <dirk@loth.demon.co.uk>:
17 * Support the possibility of hardware state restoring.
19 * Raph <grey.havens@earthling.net>:
20 * Support for preserving states of network devices and virtual console
21 * (including X and svgatextmode)
23 * Kurt Garloff <garloff@suse.de>:
24 * Straightened the critical function in order to prevent compilers from
25 * playing tricks with local variables.
27 * Andreas Mohr <a.mohr@mailto.de>
29 * Alex Badea <vampire@go.ro>:
32 * More state savers are welcome. Especially for the scsi layer...
34 * For TODOs,FIXMEs also look in Documentation/swsusp.txt
37 #include <linux/module.h>
39 #include <linux/suspend.h>
40 #include <linux/smp_lock.h>
41 #include <linux/file.h>
42 #include <linux/utsname.h>
43 #include <linux/version.h>
44 #include <linux/delay.h>
45 #include <linux/reboot.h>
46 #include <linux/vt_kern.h>
47 #include <linux/bitops.h>
48 #include <linux/interrupt.h>
49 #include <linux/kbd_kern.h>
50 #include <linux/keyboard.h>
51 #include <linux/spinlock.h>
52 #include <linux/genhd.h>
53 #include <linux/kernel.h>
54 #include <linux/major.h>
55 #include <linux/swap.h>
57 #include <linux/device.h>
58 #include <linux/buffer_head.h>
59 #include <linux/swapops.h>
60 #include <linux/bootmem.h>
62 #include <asm/uaccess.h>
63 #include <asm/mmu_context.h>
64 #include <asm/pgtable.h>
67 extern long sys_sync(void);
69 unsigned char software_suspend_enabled = 0;
71 #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
72 /* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
73 we probably do not take enough locks for switching consoles, etc,
74 so bad things might happen.
76 #if !defined(CONFIG_VT) || !defined(CONFIG_VT_CONSOLE)
77 #undef SUSPEND_CONSOLE
80 #define TIMEOUT (6 * HZ) /* Timeout for stopping processes */
81 #define __ADDRESS(x) ((unsigned long) phys_to_virt(x))
82 #define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT)
83 #define ADDRESS2(x) __ADDRESS(__pa(x)) /* Needed for x86-64 where some pages are in memory twice */
85 /* References to section boundaries */
86 extern char __nosave_begin, __nosave_end;
88 extern int is_head_of_free_region(struct page *);
91 spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
93 /* Variables to be preserved over suspend */
94 static int new_loglevel = 7;
95 static int orig_loglevel = 0;
96 static int orig_fgconsole, orig_kmsg;
97 static int pagedir_order_check;
98 static int nr_copy_pages_check;
100 static int resume_status = 0;
101 static char resume_file[256] = ""; /* For resume= kernel option */
102 static dev_t resume_device;
103 /* Local variables that should not be affected by save */
104 unsigned int nr_copy_pages __nosavedata = 0;
106 static int pm_suspend_state = 0;
108 /* Suspend pagedir is allocated before final copy, therefore it
109 must be freed after resume
111 Warning: this is evil. There are actually two pagedirs at time of
112 resume. One is "pagedir_save", which is empty frame allocated at
113 time of suspend, that must be freed. Second is "pagedir_nosave",
114 allocated at time of resume, that travels through memory not to
115 collide with anything.
117 suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
118 static suspend_pagedir_t *pagedir_save;
119 static int pagedir_order __nosavedata = 0;
122 char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
127 union swap_header swh;
129 struct suspend_header sh;
133 * XXX: We try to keep some more pages free so that I/O operations succeed
134 * without paging. Might this be more?
136 #define PAGES_FOR_IO 512
138 static const char name_suspend[] = "Suspend Machine: ";
139 static const char name_resume[] = "Resume Machine: ";
144 #define DEBUG_DEFAULT
147 #define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */
150 # define PRINTK(f, a...) printk(f, ## a)
152 # define PRINTK(f, a...)
156 #define MDELAY(a) mdelay(a)
162 * Refrigerator and related stuff
165 #define INTERESTING(p) \
166 /* We don't want to touch kernel_threads..*/ \
167 if (p->flags & PF_IOTHREAD) \
171 if (p->state == TASK_ZOMBIE) \
174 /* Refrigerator is place where frozen processes are stored :-). */
175 void refrigerator(unsigned long flag)
177 /* You need correct to work with real-time processes.
178 OTOH, this way one process may see (via /proc/) some other
179 process in stopped state (and thereby discovered we were
180 suspended. We probably do not care.
183 save = current->state;
184 current->state = TASK_STOPPED;
185 PRINTK("%s entered refrigerator\n", current->comm);
187 current->flags &= ~PF_FREEZE;
189 flush_signals(current); /* We have signaled a kernel thread, which isn't normal behaviour
190 and that may lead to 100%CPU sucking because those threads
191 just don't manage signals. */
192 current->flags |= PF_FROZEN;
193 while (current->flags & PF_FROZEN)
195 PRINTK("%s left refrigerator\n", current->comm);
196 current->state = save;
199 /* 0 = success, else # of processes that we failed to stop */
200 int freeze_processes(void)
203 unsigned long start_time;
204 struct task_struct *g, *p;
206 printk( "Stopping tasks: " );
207 start_time = jiffies;
210 read_lock(&tasklist_lock);
211 do_each_thread(g, p) {
214 if (p->flags & PF_FROZEN)
217 /* FIXME: smp problem here: we may not access other process' flags
219 p->flags |= PF_FREEZE;
220 spin_lock_irqsave(&p->sighand->siglock, flags);
221 signal_wake_up(p, 0);
222 spin_unlock_irqrestore(&p->sighand->siglock, flags);
224 } while_each_thread(g, p);
225 read_unlock(&tasklist_lock);
226 yield(); /* Yield is okay here */
227 if (time_after(jiffies, start_time + TIMEOUT)) {
229 printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
239 void thaw_processes(void)
241 struct task_struct *g, *p;
243 printk( "Restarting tasks..." );
244 read_lock(&tasklist_lock);
245 do_each_thread(g, p) {
248 if (p->flags & PF_FROZEN) p->flags &= ~PF_FROZEN;
250 printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
252 } while_each_thread(g, p);
254 read_unlock(&tasklist_lock);
263 static __inline__ int fill_suspend_header(struct suspend_header *sh)
265 memset((char *)sh, 0, sizeof(*sh));
267 sh->version_code = LINUX_VERSION_CODE;
268 sh->num_physpages = num_physpages;
269 strncpy(sh->machine, system_utsname.machine, 8);
270 strncpy(sh->version, system_utsname.version, 20);
271 /* FIXME: Is this bogus? --RR */
272 sh->num_cpus = num_online_cpus();
273 sh->page_size = PAGE_SIZE;
274 sh->suspend_pagedir = pagedir_nosave;
275 BUG_ON (pagedir_save != pagedir_nosave);
276 sh->num_pbes = nr_copy_pages;
277 /* TODO: needed? mounted fs' last mounted date comparison
278 * [so they haven't been mounted since last suspend.
279 * Maybe it isn't.] [we'd need to do this for _all_ fs-es]
285 * This is our sync function. With this solution we probably won't sleep
286 * but that should not be a problem since tasks are stopped..
289 static inline void do_suspend_sync(void)
292 #warning This might be broken. We need to somehow wait for data to reach the disk
295 /* We memorize in swapfile_used what swap devices are used for suspension */
296 #define SWAPFILE_UNUSED 0
297 #define SWAPFILE_SUSPEND 1 /* This is the suspending device */
298 #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
300 static unsigned short swapfile_used[MAX_SWAPFILES];
301 static unsigned short root_swap;
302 #define MARK_SWAP_SUSPEND 0
303 #define MARK_SWAP_RESUME 2
305 static void mark_swapfiles(swp_entry_t prev, int mode)
311 if (root_swap == 0xFFFF) /* ignored */
314 page = alloc_page(GFP_ATOMIC);
316 panic("Out of memory in mark_swapfiles");
317 cur = page_address(page);
318 /* XXX: this is dirty hack to get first page of swap file */
319 entry = swp_entry(root_swap, 0);
320 rw_swap_page_sync(READ, entry, page);
322 if (mode == MARK_SWAP_RESUME) {
323 if (!memcmp("S1",cur->swh.magic.magic,2))
324 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
325 else if (!memcmp("S2",cur->swh.magic.magic,2))
326 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
327 else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
328 name_resume, cur->swh.magic.magic);
330 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)))
331 memcpy(cur->swh.magic.magic,"S1SUSP....",10);
332 else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
333 memcpy(cur->swh.magic.magic,"S2SUSP....",10);
334 else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
335 cur->link.next = prev; /* prev is the first/last swap page of the resume area */
336 /* link.next lies *no more* in last 4/8 bytes of magic */
338 rw_swap_page_sync(WRITE, entry, page);
342 static void read_swapfiles(void) /* This is called before saving image */
346 len=strlen(resume_file);
350 for(i=0; i<MAX_SWAPFILES; i++) {
351 if (swap_info[i].flags == 0) {
352 swapfile_used[i]=SWAPFILE_UNUSED;
355 printk(KERN_WARNING "resume= option should be used to set suspend device" );
356 if(root_swap == 0xFFFF) {
357 swapfile_used[i] = SWAPFILE_SUSPEND;
360 swapfile_used[i] = SWAPFILE_IGNORED;
362 /* we ignore all swap devices that are not the resume_file */
364 // FIXME if(resume_device == swap_info[i].swap_device) {
365 swapfile_used[i] = SWAPFILE_SUSPEND;
369 printk( "Resume: device %s (%x != %x) ignored\n", swap_info[i].swap_file->d_name.name, swap_info[i].swap_device, resume_device );
371 swapfile_used[i] = SWAPFILE_IGNORED;
379 static void lock_swapdevices(void) /* This is called after saving image so modification
380 will be lost after resume... and that's what we want. */
385 for(i = 0; i< MAX_SWAPFILES; i++)
386 if(swapfile_used[i] == SWAPFILE_IGNORED) {
387 swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
388 lock_swapdevices can unlock the devices. */
393 static int write_suspend_image(void)
396 swp_entry_t entry, prev = { 0 };
397 int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
398 union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC);
399 unsigned long address;
402 printk( "Writing data to swap (%d pages): ", nr_copy_pages );
403 for (i=0; i<nr_copy_pages; i++) {
406 if (!(entry = get_swap_page()).val)
407 panic("\nNot enough swapspace when writing data" );
409 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
410 panic("\nPage %d: not enough swapspace on suspend device", i );
412 address = (pagedir_nosave+i)->address;
413 page = virt_to_page(address);
414 rw_swap_page_sync(WRITE, entry, page);
415 (pagedir_nosave+i)->swap_address = entry;
418 printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
419 for (i=0; i<nr_pgdir_pages; i++) {
420 cur = (union diskpage *)((char *) pagedir_nosave)+i;
421 BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
423 if (!(entry = get_swap_page()).val) {
424 printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
425 panic("Don't know how to recover");
426 free_page((unsigned long) buffer);
430 if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
431 panic("\nNot enough swapspace for pagedir on suspend device" );
433 BUG_ON (sizeof(swp_entry_t) != sizeof(long));
434 BUG_ON (PAGE_SIZE % sizeof(struct pbe));
436 cur->link.next = prev;
437 page = virt_to_page((unsigned long)cur);
438 rw_swap_page_sync(WRITE, entry, page);
442 BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
443 BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
444 if (!(entry = get_swap_page()).val)
445 panic( "\nNot enough swapspace when writing header" );
446 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
447 panic("\nNot enough swapspace for header on suspend device" );
449 cur = (void *) buffer;
450 if (fill_suspend_header(&cur->sh))
451 panic("\nOut of memory while writing header");
453 cur->link.next = prev;
455 page = virt_to_page((unsigned long)cur);
456 rw_swap_page_sync(WRITE, entry, page);
460 mark_swapfiles(prev, MARK_SWAP_SUSPEND);
464 free_page((unsigned long) buffer);
468 /* if pagedir_p != NULL it also copies the counted pages */
469 static int count_and_copy_data_pages(struct pbe *pagedir_p)
472 int nr_copy_pages = 0;
476 #ifdef CONFIG_DISCONTIGMEM
477 panic("Discontingmem not supported");
479 BUG_ON (max_pfn != num_physpages);
481 for (pfn = 0; pfn < max_pfn; pfn++) {
482 page = pfn_to_page(pfn);
483 if (PageHighMem(page))
484 panic("Swsusp not supported on highmem boxes. Send 1GB of RAM to <pavel@ucw.cz> and try again ;-).");
486 if (!PageReserved(page)) {
487 if (PageNosave(page))
490 if ((chunk_size=is_head_of_free_region(page))!=0) {
491 pfn += chunk_size - 1;
494 } else if (PageReserved(page)) {
495 BUG_ON (PageNosave(page));
498 * Just copy whole code segment. Hopefully it is not that big.
500 if ((ADDRESS(pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) &&
501 (ADDRESS(pfn) < (unsigned long) ADDRESS2(&__nosave_end))) {
502 PRINTK("[nosave %lx]", ADDRESS(pfn));
505 /* Hmm, perhaps copying all reserved pages is not too healthy as they may contain
506 critical bios data? */
511 pagedir_p->orig_address = ADDRESS(pfn);
512 copy_page((void *) pagedir_p->address, (void *) pagedir_p->orig_address);
516 return nr_copy_pages;
519 static void free_suspend_pagedir(unsigned long this_pagedir)
523 unsigned long this_pagedir_end = this_pagedir +
524 (PAGE_SIZE << pagedir_order);
526 for(pfn = 0; pfn < num_physpages; pfn++) {
527 page = pfn_to_page(pfn);
528 if (!TestClearPageNosave(page))
531 if (ADDRESS(pfn) >= this_pagedir && ADDRESS(pfn) < this_pagedir_end)
532 continue; /* old pagedir gets freed in one */
534 free_page(ADDRESS(pfn));
536 free_pages(this_pagedir, pagedir_order);
539 static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
542 suspend_pagedir_t *pagedir;
546 pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
548 p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order);
552 page = virt_to_page(pagedir);
553 for(i=0; i < 1<<pagedir_order; i++)
554 SetPageNosave(page++);
556 while(nr_copy_pages--) {
557 p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
559 free_suspend_pagedir((unsigned long) pagedir);
563 SetPageNosave(virt_to_page(p->address));
570 static int prepare_suspend_console(void)
572 orig_loglevel = console_loglevel;
573 console_loglevel = new_loglevel;
576 orig_fgconsole = fg_console;
577 #ifdef SUSPEND_CONSOLE
578 if(vc_allocate(SUSPEND_CONSOLE))
579 /* we can't have a free VC for now. Too bad,
580 * we don't want to mess the screen for now. */
583 set_console (SUSPEND_CONSOLE);
584 if(vt_waitactive(SUSPEND_CONSOLE)) {
585 PRINTK("Bummer. Can't switch VCs.");
588 orig_kmsg = kmsg_redirect;
589 kmsg_redirect = SUSPEND_CONSOLE;
595 static void restore_console(void)
597 console_loglevel = orig_loglevel;
598 #ifdef SUSPEND_CONSOLE
599 set_console (orig_fgconsole);
604 static int prepare_suspend_processes(void)
606 sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */
607 if (freeze_processes()) {
608 printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" );
616 * Try to free as much memory as possible, but do not OOM-kill anyone
618 * Notice: all userland should be stopped at this point, or livelock is possible.
620 static void free_some_memory(void)
622 printk("Freeing memory: ");
623 while (shrink_all_memory(10000))
628 /* Make disk drivers accept operations, again */
629 static void drivers_unsuspend(void)
631 device_resume(RESUME_RESTORE_STATE);
632 device_resume(RESUME_ENABLE);
635 /* Called from process context */
636 static int drivers_suspend(void)
638 device_suspend(4, SUSPEND_NOTIFY);
639 device_suspend(4, SUSPEND_SAVE_STATE);
640 device_suspend(4, SUSPEND_DISABLE);
641 if(!pm_suspend_state) {
642 if(pm_send_all(PM_SUSPEND,(void *)3)) {
643 printk(KERN_WARNING "Problem while sending suspend event\n");
648 printk(KERN_WARNING "PM suspend state already raised\n");
653 #define RESUME_PHASE1 1 /* Called from interrupts disabled */
654 #define RESUME_PHASE2 2 /* Called with interrupts enabled */
655 #define RESUME_ALL_PHASES (RESUME_PHASE1 | RESUME_PHASE2)
656 static void drivers_resume(int flags)
658 if (flags & RESUME_PHASE1) {
659 device_resume(RESUME_RESTORE_STATE);
660 device_resume(RESUME_ENABLE);
662 if (flags & RESUME_PHASE2) {
663 if(pm_suspend_state) {
664 if(pm_send_all(PM_RESUME,(void *)0))
665 printk(KERN_WARNING "Problem while sending resume event\n");
668 printk(KERN_WARNING "PM suspend state wasn't raised\n");
670 #ifdef SUSPEND_CONSOLE
671 update_screen(fg_console); /* Hmm, is this the problem? */
676 static int suspend_prepare_image(void)
679 unsigned int nr_needed_pages = 0;
683 pagedir_nosave = NULL;
684 printk( "/critical section: Counting pages to copy" );
685 nr_copy_pages = count_and_copy_data_pages(NULL);
686 nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
688 printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages());
689 if(nr_free_pages() < nr_needed_pages) {
690 printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
691 name_suspend, nr_needed_pages-nr_free_pages());
695 si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information.
696 We should only consider resume_device. */
697 if (i.freeswap < nr_needed_pages) {
698 printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n",
699 name_suspend, nr_needed_pages-i.freeswap);
703 PRINTK( "Alloc pagedir\n" );
704 pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
705 if(!pagedir_nosave) {
706 /* Shouldn't happen */
707 printk(KERN_CRIT "%sCouldn't allocate enough pages\n",name_suspend);
708 panic("Really should not happen");
711 nr_copy_pages_check = nr_copy_pages;
712 pagedir_order_check = pagedir_order;
714 drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */
715 if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */
719 * End of critical section. From now on, we can write to memory,
720 * but we should not touch disk. This specially means we must _not_
721 * touch swap space! Except we must write out our image of course.
724 printk( "critical section/: done (%d pages copied)\n", nr_copy_pages );
728 static void suspend_save_image(void)
733 write_suspend_image();
734 lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */
736 /* It is important _NOT_ to umount filesystems at this point. We want
737 * them synced (in case something goes wrong) but we DO not want to mark
738 * filesystem clean: it is not. (And it does not matter, if we resume
739 * correctly, we'll mark system clean, anyway.)
743 static void suspend_power_down(void)
747 printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": "");
749 PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state);
751 if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL))))
752 machine_restart(NULL);
760 printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
770 void do_magic_resume_1(void)
774 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
776 PRINTK( "Waiting for DMAs to settle down...\n");
777 mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right?
778 Do it with disabled interrupts for best effect. That way, if some
779 driver scheduled DMA, we have good chance for DMA to finish ;-). */
782 void do_magic_resume_2(void)
784 BUG_ON (nr_copy_pages_check != nr_copy_pages);
785 BUG_ON (pagedir_order_check != pagedir_order);
787 __flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */
789 PRINTK( "Freeing prev allocated pagedir\n" );
790 free_suspend_pagedir((unsigned long) pagedir_save);
791 spin_unlock_irq(&suspend_pagedir_lock);
792 drivers_resume(RESUME_ALL_PHASES);
794 PRINTK( "Fixing swap signatures... " );
795 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
798 #ifdef SUSPEND_CONSOLE
799 update_screen(fg_console); /* Hmm, is this the problem? */
803 /* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does:
806 do_magic_suspend_1();
807 save_processor_state();
809 do_magic_suspend_2();
812 GO_TO_SWAPPER_PAGE_TABLES
816 restore_processor_state();
821 void do_magic_suspend_1(void)
826 spin_lock_irq(&suspend_pagedir_lock);
829 void do_magic_suspend_2(void)
833 is_problem = suspend_prepare_image();
834 spin_unlock_irq(&suspend_pagedir_lock);
836 kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */
838 suspend_save_image();
839 suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
842 printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
843 MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
847 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
850 free_pages((unsigned long) pagedir_nosave, pagedir_order);
851 spin_unlock_irq(&suspend_pagedir_lock);
852 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
853 PRINTK(KERN_WARNING "%sLeaving do_magic_suspend_2...\n", name_suspend);
856 static void do_software_suspend(void)
858 arch_prepare_suspend();
859 if (prepare_suspend_console())
860 printk( "%sCan't allocate a console... proceeding\n", name_suspend);
861 if (!prepare_suspend_processes()) {
863 /* At this point, all user processes and "dangerous"
864 kernel threads are stopped. Free some memory, as we
865 need half of memory free. */
869 /* No need to invalidate any vfsmnt list -- they will be valid after resume, anyway.
871 * We sync here -- so you have consistent filesystem state when things go wrong.
872 * -- so that noone writes to disk after we do atomic copy of data.
874 PRINTK("Syncing disks before copy\n");
877 /* Save state of all device drivers, and stop them. */
878 if(drivers_suspend()==0)
879 /* If stopping device drivers worked, we proceed basically into
880 * suspend_save_image.
882 * do_magic(0) returns after system is resumed.
884 * do_magic() copies all "used" memory to "free" memory, then
885 * unsuspends all device drivers, and writes memory to disk
886 * using normal kernel mechanism.
889 PRINTK("Restarting processes...\n");
892 software_suspend_enabled = 1;
898 * This is main interface to the outside world. It needs to be
899 * called from process context.
901 void software_suspend(void)
903 if(!software_suspend_enabled)
906 software_suspend_enabled = 0;
907 BUG_ON(in_interrupt());
908 do_software_suspend();
911 /* More restore stuff */
913 /* FIXME: Why not memcpy(to, from, 1<<pagedir_order*PAGE_SIZE)? */
914 static void copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from)
917 char *topointer=(char *)to, *frompointer=(char *)from;
919 for(i=0; i < 1 << pagedir_order; i++) {
920 copy_page(topointer, frompointer);
921 topointer += PAGE_SIZE;
922 frompointer += PAGE_SIZE;
926 #define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
929 * Returns true if given address/order collides with any orig_address
931 static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
935 unsigned long addre = addr + (PAGE_SIZE<<order);
937 for(i=0; i < nr_copy_pages; i++)
938 if((pagedir+i)->orig_address >= addr &&
939 (pagedir+i)->orig_address < addre)
946 * We check here that pagedir & pages it points to won't collide with pages
947 * where we're going to restore from the loaded pages later
949 static int check_pagedir(void)
953 for(i=0; i < nr_copy_pages; i++) {
957 addr = get_zeroed_page(GFP_ATOMIC);
960 } while (does_collide(addr));
962 (pagedir_nosave+i)->address = addr;
967 static int relocate_pagedir(void)
970 * We have to avoid recursion (not to overflow kernel stack),
971 * and that's why code looks pretty cryptic
973 suspend_pagedir_t *new_pagedir, *old_pagedir = pagedir_nosave;
974 void **eaten_memory = NULL;
975 void **c = eaten_memory, *m, *f;
977 printk("Relocating pagedir");
979 if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
980 printk("not necessary\n");
984 while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) {
985 memset(m, 0, PAGE_SIZE);
986 if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
997 pagedir_nosave = new_pagedir = m;
998 copy_pagedir(new_pagedir, old_pagedir);
1006 free_pages((unsigned long)f, pagedir_order);
1013 * Sanity check if this image makes sense with this kernel/swap context
1014 * I really don't think that it's foolproof but more than nothing..
1017 static int sanity_check_failed(char *reason)
1019 printk(KERN_ERR "%s%s\n",name_resume,reason);
1023 static int sanity_check(struct suspend_header *sh)
1025 if(sh->version_code != LINUX_VERSION_CODE)
1026 return sanity_check_failed("Incorrect kernel version");
1027 if(sh->num_physpages != num_physpages)
1028 return sanity_check_failed("Incorrect memory size");
1029 if(strncmp(sh->machine, system_utsname.machine, 8))
1030 return sanity_check_failed("Incorrect machine type");
1031 if(strncmp(sh->version, system_utsname.version, 20))
1032 return sanity_check_failed("Incorrect version");
1033 if(sh->num_cpus != num_online_cpus())
1034 return sanity_check_failed("Incorrect number of cpus");
1035 if(sh->page_size != PAGE_SIZE)
1036 return sanity_check_failed("Incorrect PAGE_SIZE");
1040 static int bdev_read_page(struct block_device *bdev, long pos, void *buf)
1042 struct buffer_head *bh;
1043 BUG_ON (pos%PAGE_SIZE);
1044 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1045 if (!bh || (!bh->b_data)) {
1048 memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */
1049 BUG_ON(!buffer_uptodate(bh));
1054 static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
1057 struct buffer_head *bh;
1058 BUG_ON (pos%PAGE_SIZE);
1059 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1060 if (!bh || (!bh->b_data)) {
1063 memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */
1064 BUG_ON(!buffer_uptodate(bh));
1065 generic_make_request(WRITE, bh);
1066 if (!buffer_uptodate(bh))
1067 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
1072 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
1076 extern dev_t __init name_to_dev_t(const char *line);
1078 static int __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
1081 int i, nr_pgdir_pages;
1083 #define PREPARENEXT \
1084 { next = cur->link.next; \
1085 next.val = swp_offset(next) * PAGE_SIZE; \
1088 if (bdev_read_page(bdev, 0, cur)) return -EIO;
1090 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) ||
1091 (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) {
1092 printk(KERN_ERR "%sThis is normal swap space\n", name_resume );
1096 PREPARENEXT; /* We have to read next position before we overwrite it */
1098 if (!memcmp("S1",cur->swh.magic.magic,2))
1099 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
1100 else if (!memcmp("S2",cur->swh.magic.magic,2))
1101 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
1105 panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
1106 name_resume, cur->swh.magic.magic);
1109 /* We don't do a sanity check here: we want to restore the swap
1110 whatever version of kernel made the suspend image;
1111 We need to write swap, but swap is *not* enabled so
1112 we must write the device directly */
1113 printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
1114 bdev_write_page(bdev, 0, cur);
1117 if (prepare_suspend_console())
1118 printk("%sCan't allocate a console... proceeding\n", name_resume);
1119 printk( "%sSignature found, resuming\n", name_resume );
1122 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1123 if (sanity_check(&cur->sh)) /* Is this same machine? */
1127 pagedir_save = cur->sh.suspend_pagedir;
1128 nr_copy_pages = cur->sh.num_pbes;
1129 nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
1130 pagedir_order = get_bitmask_order(nr_pgdir_pages);
1132 pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
1133 if (!pagedir_nosave)
1136 PRINTK( "%sReading pagedir, ", name_resume );
1138 /* We get pages in reverse order of saving! */
1139 for (i=nr_pgdir_pages-1; i>=0; i--) {
1141 cur = (union diskpage *)((char *) pagedir_nosave)+i;
1142 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1147 if (relocate_pagedir())
1149 if (check_pagedir())
1152 printk( "Reading image data (%d pages): ", nr_copy_pages );
1153 for(i=0; i < nr_copy_pages; i++) {
1154 swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
1157 /* You do not need to check for overlaps...
1158 ... check_pagedir already did this work */
1159 if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
1166 static int read_suspend_image(const char * specialfile, int noresume)
1168 union diskpage *cur;
1169 unsigned long scratch_page = 0;
1171 char b[BDEVNAME_SIZE];
1173 resume_device = name_to_dev_t(specialfile);
1174 scratch_page = get_zeroed_page(GFP_ATOMIC);
1175 cur = (void *) scratch_page;
1177 struct block_device *bdev;
1178 printk("Resuming from device %s\n",
1179 __bdevname(resume_device, b));
1180 bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW);
1182 error = PTR_ERR(bdev);
1184 set_blocksize(bdev, PAGE_SIZE);
1185 error = __read_suspend_image(bdev, cur, noresume);
1186 blkdev_put(bdev, BDEV_RAW);
1188 } else error = -ENOMEM;
1191 free_page(scratch_page);
1194 PRINTK("Reading resume file was successful\n");
1199 printk( "%sI/O error\n", name_resume);
1202 printk( "%s%s: No such file or directory\n", name_resume, specialfile);
1205 printk( "%sNot enough memory\n", name_resume);
1208 printk( "%sError %d resuming\n", name_resume, error );
1215 * Called from init kernel_thread.
1216 * We check if we have an image and if so we try to resume
1219 void software_resume(void)
1221 if (num_online_cpus() > 1) {
1222 printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n");
1225 /* We enable the possibility of machine suspend */
1226 software_suspend_enabled = 1;
1230 printk( "%s", name_resume );
1231 if (resume_status == NORESUME) {
1233 read_suspend_image(resume_file, 1);
1234 printk( "disabled\n" );
1239 orig_loglevel = console_loglevel;
1240 console_loglevel = new_loglevel;
1242 if (!resume_file[0] && resume_status == RESUME_SPECIFIED) {
1243 printk( "suspension device unspecified\n" );
1247 printk( "resuming from %s\n", resume_file);
1248 if (read_suspend_image(resume_file, 0))
1251 panic("This never returns");
1254 console_loglevel = orig_loglevel;
1258 static int __init resume_setup(char *str)
1260 if (resume_status == NORESUME)
1263 strncpy( resume_file, str, 255 );
1264 resume_status = RESUME_SPECIFIED;
1269 static int __init noresume_setup(char *str)
1271 resume_status = NORESUME;
1275 __setup("noresume", noresume_setup);
1276 __setup("resume=", resume_setup);
1278 EXPORT_SYMBOL(software_suspend);
1279 EXPORT_SYMBOL(software_suspend_enabled);
1280 EXPORT_SYMBOL(refrigerator);