3 * Purpose: Migrate data from physical pages with excessive correctable
4 * errors to new physical pages. Keep the old pages on a discard
7 * Copyright (C) 2008 SGI - Silicon Graphics Inc.
8 * Copyright (C) 2008 Russ Anderson <rja@sgi.com>
11 #include <linux/sysdev.h>
12 #include <linux/types.h>
13 #include <linux/sched.h>
14 #include <linux/module.h>
15 #include <linux/kernel.h>
16 #include <linux/smp.h>
17 #include <linux/workqueue.h>
19 #include <linux/swap.h>
20 #include <linux/vmalloc.h>
21 #include <linux/migrate.h>
22 #include <linux/page-isolation.h>
23 #include <linux/memcontrol.h>
24 #include <linux/kobject.h>
25 #include <linux/kthread.h>
28 #include <asm/system.h>
29 #include <asm/sn/sn_cpuid.h>
32 #define BADRAM_BASENAME "badram"
33 #define CE_HISTORY_LENGTH 30
39 static struct cpe_info cpe[CE_HISTORY_LENGTH];
41 static int cpe_polling_enabled = 1;
44 static int mstat_cannot_isolate;
45 static int mstat_failed_to_discard;
46 static int mstat_already_marked;
47 static int mstat_already_on_list;
49 /* IRQ handler notifies this wait queue on receipt of an IRQ */
50 DECLARE_WAIT_QUEUE_HEAD(cpe_activate_IRQ_wq);
51 static DECLARE_COMPLETION(kthread_cpe_migrated_exited);
53 DEFINE_SPINLOCK(cpe_migrate_lock);
56 get_physical_address(void *buffer, u64 *paddr, u16 *node)
58 sal_log_record_header_t *rh;
59 sal_log_mem_dev_err_info_t *mdei;
60 ia64_err_rec_t *err_rec;
61 sal_log_platform_err_info_t *plat_err;
65 rh = &err_rec->sal_elog_header;
70 * Make sure it is a corrected error.
72 if (rh->severity != sal_log_severity_corrected)
75 plat_err = (sal_log_platform_err_info_t *)&err_rec->proc_err;
77 guid = plat_err->mem_dev_err.header.guid;
78 if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
82 mdei = &plat_err->mem_dev_err;
83 if (mdei->valid.oem_data) {
84 if (mdei->valid.physical_addr)
85 *paddr = mdei->physical_addr;
87 if (mdei->valid.node) {
88 if (ia64_platform_is("sn2"))
89 *node = nasid_to_cnodeid(mdei->node);
98 alloc_migrate_page(struct page *ignored, unsigned long node, int **x)
101 return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
105 validate_paddr_page(u64 paddr)
112 if (!ia64_phys_addr_valid(paddr))
115 if (!pfn_valid(paddr >> PAGE_SHIFT))
118 page = phys_to_page(paddr);
119 if (PageMemError(page))
120 mstat_already_marked++;
124 extern int isolate_lru_page(struct page *);
126 ia64_mca_cpe_move_page(u64 paddr, u32 node)
132 ret = validate_paddr_page(paddr);
137 * convert physical address to page number
139 page = phys_to_page(paddr);
142 ret = isolate_lru_page(page);
144 mstat_cannot_isolate++;
148 list_add(&page->lru, &pagelist);
149 ret = migrate_pages(&pagelist, alloc_migrate_page, node, 0);
152 list_add_tail(&page->lru, &badpagelist);
154 mstat_failed_to_discard++;
156 * The page failed to migrate and is not on the bad page list.
157 * Clearing the error bit will allow another attempt to migrate
158 * if it gets another correctable error.
160 ClearPageMemError(page);
168 * Pulls the physical address off the list and calls the migration code.
169 * Will process all the addresses on the list.
172 cpe_process_queue(void)
179 paddr = cpe[cpe_tail].paddr;
182 * There is a valid entry that needs processing.
184 node = cpe[cpe_tail].node;
186 ret = ia64_mca_cpe_move_page(paddr, node);
189 * Even though the return status is negative,
190 * clear the entry. If the same address has
191 * another CPE it will be re-added to the list.
193 cpe[cpe_tail].paddr = 0;
196 if (++cpe_tail >= CE_HISTORY_LENGTH)
199 } while (cpe_tail != cpe_head);
206 return (cpe_head == cpe_tail) && (!cpe[cpe_head].paddr);
210 * kthread_cpe_migrate
211 * kthread_cpe_migrate is created at module load time and lives
212 * until the module is removed. When not active, it will sleep.
215 kthread_cpe_migrate(void *ignore)
221 (void)wait_event_interruptible(cpe_activate_IRQ_wq,
222 (!cpe_list_empty() ||
224 cpe_process_queue(); /* process work */
226 complete(&kthread_cpe_migrated_exited);
230 DEFINE_SPINLOCK(cpe_list_lock);
234 * Get the physical address out of the CPE record, add it
235 * to the list of addresses to migrate (if not already on),
236 * and schedule the back end worker task. This is called
237 * in interrupt context so cannot directly call the migration
243 * 1 on Success, -1 on failure
246 cpe_setup_migrate(void *rec)
250 /* int head, tail; */
256 get_physical_address(rec, &paddr, &node);
257 ret = validate_paddr_page(paddr);
261 if (!cpe_list_empty())
262 for (i = 0; i < CE_HISTORY_LENGTH; i++) {
263 if (PAGE_ALIGN(cpe[i].paddr) == PAGE_ALIGN(paddr)) {
264 mstat_already_on_list++;
265 return 1; /* already on the list */
269 if (!spin_trylock(&cpe_list_lock)) {
271 * Someone else has the lock. To avoid spinning in interrupt
272 * handler context, bail.
277 if (cpe[cpe_head].paddr == 0) {
278 cpe[cpe_head].node = node;
279 cpe[cpe_head].paddr = paddr;
281 if (++cpe_head >= CE_HISTORY_LENGTH)
284 spin_unlock(&cpe_list_lock);
286 wake_up_interruptible(&cpe_activate_IRQ_wq);
292 * =============================================================================
297 * Free one page from the list of bad pages.
300 free_one_bad_page(unsigned long paddr)
303 struct page *page, *page2, *target;
306 * Verify page address
308 target = phys_to_page(paddr);
309 list_for_each_entry_safe(page, page2, &badpagelist, lru) {
313 ClearPageMemError(page); /* Mark the page as good */
315 list_move_tail(&page->lru, &pagelist);
316 putback_lru_pages(&pagelist);
324 * Free all of the pages on the bad pages list.
327 free_all_bad_pages(void)
329 struct page *page, *page2;
331 list_for_each_entry_safe(page, page2, &badpagelist, lru) {
332 ClearPageMemError(page); /* Mark the page as good */
335 putback_lru_pages(&badpagelist);
342 badpage_store(struct kobject *kobj,
343 struct kobj_attribute *attr, const char *buf, size_t count)
345 char optstr[OPT_LEN];
353 strlcpy(optstr, buf, len);
355 err = strict_strtoul(optstr, 16, &opt);
360 free_all_bad_pages();
362 free_one_bad_page(opt);
369 * Display the number, size, and addresses of all the pages on the
372 * Note that sysfs provides buf of PAGE_SIZE length. bufend tracks
373 * the remaining space in buf to avoid overflowing.
376 badpage_show(struct kobject *kobj,
377 struct kobj_attribute *attr, char *buf)
380 struct page *page, *page2;
382 char *bufend = buf + PAGE_SIZE;
384 cnt = snprintf(buf, bufend - (buf + cnt),
385 "Memory marked bad: %d kB\n"
386 "Pages marked bad: %d\n"
387 "Unable to isolate on LRU: %d\n"
388 "Unable to migrate: %d\n"
389 "Already marked bad: %d\n"
390 "Already on list: %d\n"
391 "List of bad physical pages\n",
392 total_badpages << (PAGE_SHIFT - 10), total_badpages,
393 mstat_cannot_isolate, mstat_failed_to_discard,
394 mstat_already_marked, mstat_already_on_list
397 list_for_each_entry_safe(page, page2, &badpagelist, lru) {
398 if (bufend - (buf + cnt) < 20)
399 break; /* Avoid overflowing the buffer */
400 cnt += snprintf(buf + cnt, bufend - (buf + cnt),
401 " 0x%011lx", page_to_phys(page));
403 cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
405 cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
410 static struct kobj_attribute badram_attr = {
413 .mode = S_IWUSR | S_IRUGO,
415 .show = badpage_show,
416 .store = badpage_store,
420 cpe_migrate_external_handler_init(void)
423 struct task_struct *kthread;
425 error = sysfs_create_file(kernel_kobj, &badram_attr.attr);
433 kthread = kthread_run(kthread_cpe_migrate, NULL, "cpe_migrate");
434 if (IS_ERR(kthread)) {
435 complete(&kthread_cpe_migrated_exited);
440 * register external ce handler
442 if (ia64_reg_CE_extension(cpe_setup_migrate)) {
443 printk(KERN_ERR "ia64_reg_CE_extension failed.\n");
446 cpe_poll_enabled = cpe_polling_enabled;
448 printk(KERN_INFO "Registered badram Driver\n");
453 cpe_migrate_external_handler_exit(void)
455 /* unregister external mca handlers */
456 ia64_unreg_CE_extension();
459 cpe_active = 0; /* tell kthread_cpe_migrate to exit */
460 wake_up_interruptible(&cpe_activate_IRQ_wq);
461 wait_for_completion(&kthread_cpe_migrated_exited);
463 sysfs_remove_file(kernel_kobj, &badram_attr.attr);
466 module_init(cpe_migrate_external_handler_init);
467 module_exit(cpe_migrate_external_handler_exit);
469 module_param(cpe_polling_enabled, int, 0644);
470 MODULE_PARM_DESC(cpe_polling_enabled,
471 "Enable polling with migration");
473 MODULE_AUTHOR("Russ Anderson <rja@sgi.com>");
474 MODULE_DESCRIPTION("ia64 Corrected Error page migration driver");