- patches.suse/slab-handle-memoryless-nodes-v2a.patch: Refresh.
[linux-flexiantxendom0-3.2.10.git] / arch / ia64 / kernel / cpe_migrate.c
1 /*
2  * File:        cpe_migrate.c
3  * Purpose:     Migrate data from physical pages with excessive correctable
4  *              errors to new physical pages.  Keep the old pages on a discard
5  *              list.
6  *
7  * Copyright (C) 2008 SGI - Silicon Graphics Inc.
8  * Copyright (C) 2008 Russ Anderson <rja@sgi.com>
9  */
10
11 #include <linux/sysdev.h>
12 #include <linux/types.h>
13 #include <linux/sched.h>
14 #include <linux/module.h>
15 #include <linux/kernel.h>
16 #include <linux/smp.h>
17 #include <linux/workqueue.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20 #include <linux/vmalloc.h>
21 #include <linux/migrate.h>
22 #include <linux/page-isolation.h>
23 #include <linux/memcontrol.h>
24 #include <linux/kobject.h>
25 #include <linux/kthread.h>
26
27 #include <asm/page.h>
28 #include <asm/system.h>
29 #include <asm/sn/sn_cpuid.h>
30 #include <asm/mca.h>
31
32 #define BADRAM_BASENAME         "badram"
33 #define CE_HISTORY_LENGTH       30
34
35 struct cpe_info {
36         u64     paddr;
37         u16     node;
38 };
39 static struct cpe_info cpe[CE_HISTORY_LENGTH];
40
41 static int cpe_polling_enabled = 1;
42 static int cpe_head;
43 static int cpe_tail;
44 static int mstat_cannot_isolate;
45 static int mstat_failed_to_discard;
46 static int mstat_already_marked;
47 static int mstat_already_on_list;
48
49 /* IRQ handler notifies this wait queue on receipt of an IRQ */
50 DECLARE_WAIT_QUEUE_HEAD(cpe_activate_IRQ_wq);
51 static DECLARE_COMPLETION(kthread_cpe_migrated_exited);
52 int cpe_active;
53 DEFINE_SPINLOCK(cpe_migrate_lock);
54
55 static void
56 get_physical_address(void *buffer, u64 *paddr, u16 *node)
57 {
58         sal_log_record_header_t *rh;
59         sal_log_mem_dev_err_info_t *mdei;
60         ia64_err_rec_t *err_rec;
61         sal_log_platform_err_info_t *plat_err;
62         efi_guid_t guid;
63
64         err_rec = buffer;
65         rh = &err_rec->sal_elog_header;
66         *paddr = 0;
67         *node = 0;
68
69         /*
70          * Make sure it is a corrected error.
71          */
72         if (rh->severity != sal_log_severity_corrected)
73                 return;
74
75         plat_err = (sal_log_platform_err_info_t *)&err_rec->proc_err;
76
77         guid = plat_err->mem_dev_err.header.guid;
78         if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
79                 /*
80                  * Memory cpe
81                  */
82                 mdei = &plat_err->mem_dev_err;
83                 if (mdei->valid.oem_data) {
84                         if (mdei->valid.physical_addr)
85                                 *paddr = mdei->physical_addr;
86
87                         if (mdei->valid.node) {
88                                 if (ia64_platform_is("sn2"))
89                                         *node = nasid_to_cnodeid(mdei->node);
90                                 else
91                                         *node = mdei->node;
92                         }
93                 }
94         }
95 }
96
97 static struct page *
98 alloc_migrate_page(struct page *ignored, unsigned long node, int **x)
99 {
100
101         return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
102 }
103
104 static int
105 validate_paddr_page(u64 paddr)
106 {
107         struct page *page;
108
109         if (!paddr)
110                 return -EINVAL;
111
112         if (!ia64_phys_addr_valid(paddr))
113                 return -EINVAL;
114
115         if (!pfn_valid(paddr >> PAGE_SHIFT))
116                 return -EINVAL;
117
118         page = phys_to_page(paddr);
119         if (PageMemError(page))
120                 mstat_already_marked++;
121         return 0;
122 }
123
124 extern int isolate_lru_page(struct page *);
125 static int
126 ia64_mca_cpe_move_page(u64 paddr, u32 node)
127 {
128         LIST_HEAD(pagelist);
129         struct page *page;
130         int ret;
131
132         ret = validate_paddr_page(paddr);
133         if (ret < 0)
134                 return ret;
135
136         /*
137          * convert physical address to page number
138          */
139         page = phys_to_page(paddr);
140
141         migrate_prep();
142         ret = isolate_lru_page(page);
143         if (ret) {
144                 mstat_cannot_isolate++;
145                 return ret;
146         }
147
148         list_add(&page->lru, &pagelist);
149         ret = migrate_pages(&pagelist, alloc_migrate_page, node, 0);
150         if (ret == 0) {
151                 total_badpages++;
152                 list_add_tail(&page->lru, &badpagelist);
153         } else {
154                 mstat_failed_to_discard++;
155                 /*
156                  * The page failed to migrate and is not on the bad page list.
157                  * Clearing the error bit will allow another attempt to migrate
158                  * if it gets another correctable error.
159                  */
160                 ClearPageMemError(page);
161         }
162
163         return 0;
164 }
165
166 /*
167  * cpe_process_queue
168  *      Pulls the physical address off the list and calls the migration code.
169  *      Will process all the addresses on the list.
170  */
171 void
172 cpe_process_queue(void)
173 {
174         int ret;
175         u64 paddr;
176         u16 node;
177
178         do {
179                 paddr = cpe[cpe_tail].paddr;
180                 if (paddr) {
181                         /*
182                          * There is a valid entry that needs processing.
183                          */
184                         node = cpe[cpe_tail].node;
185
186                         ret = ia64_mca_cpe_move_page(paddr, node);
187                         if (ret <= 0)
188                                 /*
189                                  * Even though the return status is negative,
190                                  * clear the entry.  If the same address has
191                                  * another CPE it will be re-added to the list.
192                                  */
193                                 cpe[cpe_tail].paddr = 0;
194
195                 }
196                 if (++cpe_tail >= CE_HISTORY_LENGTH)
197                         cpe_tail = 0;
198
199         } while (cpe_tail != cpe_head);
200         return;
201 }
202
203 inline int
204 cpe_list_empty(void)
205 {
206         return (cpe_head == cpe_tail) && (!cpe[cpe_head].paddr);
207 }
208
209 /*
210  * kthread_cpe_migrate
211  *      kthread_cpe_migrate is created at module load time and lives
212  *      until the module is removed.  When not active, it will sleep.
213  */
214 static int
215 kthread_cpe_migrate(void *ignore)
216 {
217         while (cpe_active) {
218                 /*
219                  * wait for work
220                  */
221                 (void)wait_event_interruptible(cpe_activate_IRQ_wq,
222                                                 (!cpe_list_empty() ||
223                                                 !cpe_active));
224                 cpe_process_queue();            /* process work */
225         }
226         complete(&kthread_cpe_migrated_exited);
227         return 0;
228 }
229
230 DEFINE_SPINLOCK(cpe_list_lock);
231
232 /*
233  * cpe_setup_migrate
234  *      Get the physical address out of the CPE record, add it
235  *      to the list of addresses to migrate (if not already on),
236  *      and schedule the back end worker task.  This is called
237  *      in interrupt context so cannot directly call the migration
238  *      code.
239  *
240  *  Inputs
241  *      rec     The CPE record
242  *  Outputs
243  *      1 on Success, -1 on failure
244  */
245 static int
246 cpe_setup_migrate(void *rec)
247 {
248         u64 paddr;
249         u16 node;
250         /* int head, tail; */
251         int i, ret;
252
253         if (!rec)
254                 return -EINVAL;
255
256         get_physical_address(rec, &paddr, &node);
257         ret = validate_paddr_page(paddr);
258         if (ret < 0)
259                 return -EINVAL;
260
261         if (!cpe_list_empty())
262                 for (i = 0; i < CE_HISTORY_LENGTH; i++) {
263                         if (PAGE_ALIGN(cpe[i].paddr) == PAGE_ALIGN(paddr)) {
264                                 mstat_already_on_list++;
265                                 return 1;       /* already on the list */
266                         }
267                 }
268
269         if (!spin_trylock(&cpe_list_lock)) {
270                 /*
271                  * Someone else has the lock.  To avoid spinning in interrupt
272                  * handler context, bail.
273                  */
274                 return 1;
275         }
276
277         if (cpe[cpe_head].paddr == 0) {
278                 cpe[cpe_head].node = node;
279                 cpe[cpe_head].paddr = paddr;
280
281                 if (++cpe_head >= CE_HISTORY_LENGTH)
282                         cpe_head = 0;
283         }
284         spin_unlock(&cpe_list_lock);
285
286         wake_up_interruptible(&cpe_activate_IRQ_wq);
287
288         return 1;
289 }
290
291 /*
292  * =============================================================================
293  */
294
295 /*
296  * free_one_bad_page
297  *      Free one page from the list of bad pages.
298  */
299 static int
300 free_one_bad_page(unsigned long paddr)
301 {
302         LIST_HEAD(pagelist);
303         struct page *page, *page2, *target;
304
305         /*
306          * Verify page address
307          */
308         target = phys_to_page(paddr);
309         list_for_each_entry_safe(page, page2, &badpagelist, lru) {
310                 if (page != target)
311                         continue;
312
313                 ClearPageMemError(page);        /* Mark the page as good */
314                 total_badpages--;
315                 list_move_tail(&page->lru, &pagelist);
316                 putback_lru_pages(&pagelist);
317                 break;
318         }
319         return 0;
320 }
321
322 /*
323  * free_all_bad_pages
324  *      Free all of the pages on the bad pages list.
325  */
326 static int
327 free_all_bad_pages(void)
328 {
329         struct page *page, *page2;
330
331         list_for_each_entry_safe(page, page2, &badpagelist, lru) {
332                 ClearPageMemError(page);        /* Mark the page as good */
333                 total_badpages--;
334         }
335         putback_lru_pages(&badpagelist);
336         return 0;
337 }
338
339 #define OPT_LEN 16
340
341 static ssize_t
342 badpage_store(struct kobject *kobj,
343               struct kobj_attribute *attr, const char *buf, size_t count)
344 {
345         char optstr[OPT_LEN];
346         unsigned long opt;
347         int len = OPT_LEN;
348         int err;
349
350         if (count < len)
351                 len = count;
352
353         strlcpy(optstr, buf, len);
354
355         err = strict_strtoul(optstr, 16, &opt);
356         if (err)
357                 return err;
358
359         if (opt == 0)
360                 free_all_bad_pages();
361         else
362                 free_one_bad_page(opt);
363
364         return count;
365 }
366
367 /*
368  * badpage_show
369  *      Display the number, size, and addresses of all the pages on the
370  *      bad page list.
371  *
372  *      Note that sysfs provides buf of PAGE_SIZE length.  bufend tracks
373  *      the remaining space in buf to avoid overflowing.
374  */
375 static ssize_t
376 badpage_show(struct kobject *kobj,
377              struct kobj_attribute *attr, char *buf)
378
379 {
380         struct page *page, *page2;
381         int i = 0, cnt = 0;
382         char *bufend = buf + PAGE_SIZE;
383
384         cnt = snprintf(buf, bufend - (buf + cnt),
385                         "Memory marked bad:        %d kB\n"
386                         "Pages marked bad:         %d\n"
387                         "Unable to isolate on LRU: %d\n"
388                         "Unable to migrate:        %d\n"
389                         "Already marked bad:       %d\n"
390                         "Already on list:          %d\n"
391                         "List of bad physical pages\n",
392                         total_badpages << (PAGE_SHIFT - 10), total_badpages,
393                         mstat_cannot_isolate, mstat_failed_to_discard,
394                         mstat_already_marked, mstat_already_on_list
395                         );
396
397         list_for_each_entry_safe(page, page2, &badpagelist, lru) {
398                 if (bufend - (buf + cnt) < 20)
399                         break;          /* Avoid overflowing the buffer */
400                 cnt += snprintf(buf + cnt, bufend - (buf + cnt),
401                                 " 0x%011lx", page_to_phys(page));
402                 if (!(++i % 5))
403                         cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
404         }
405         cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
406
407         return cnt;
408 }
409
410 static struct kobj_attribute badram_attr = {
411         .attr    = {
412                 .name = "badram",
413                 .mode = S_IWUSR | S_IRUGO,
414         },
415         .show = badpage_show,
416         .store = badpage_store,
417 };
418
419 static int __init
420 cpe_migrate_external_handler_init(void)
421 {
422         int error;
423         struct task_struct *kthread;
424
425         error = sysfs_create_file(kernel_kobj, &badram_attr.attr);
426         if (error)
427                 return -EINVAL;
428
429         /*
430          * set up the kthread
431          */
432         cpe_active = 1;
433         kthread = kthread_run(kthread_cpe_migrate, NULL, "cpe_migrate");
434         if (IS_ERR(kthread)) {
435                 complete(&kthread_cpe_migrated_exited);
436                 return -EFAULT;
437         }
438
439         /*
440          * register external ce handler
441          */
442         if (ia64_reg_CE_extension(cpe_setup_migrate)) {
443                 printk(KERN_ERR "ia64_reg_CE_extension failed.\n");
444                 return -EFAULT;
445         }
446         cpe_poll_enabled = cpe_polling_enabled;
447
448         printk(KERN_INFO "Registered badram Driver\n");
449         return 0;
450 }
451
452 static void __exit
453 cpe_migrate_external_handler_exit(void)
454 {
455         /* unregister external mca handlers */
456         ia64_unreg_CE_extension();
457
458         /* Stop kthread */
459         cpe_active = 0;                 /* tell kthread_cpe_migrate to exit */
460         wake_up_interruptible(&cpe_activate_IRQ_wq);
461         wait_for_completion(&kthread_cpe_migrated_exited);
462
463         sysfs_remove_file(kernel_kobj, &badram_attr.attr);
464 }
465
466 module_init(cpe_migrate_external_handler_init);
467 module_exit(cpe_migrate_external_handler_exit);
468
469 module_param(cpe_polling_enabled, int, 0644);
470 MODULE_PARM_DESC(cpe_polling_enabled,
471                 "Enable polling with migration");
472
473 MODULE_AUTHOR("Russ Anderson <rja@sgi.com>");
474 MODULE_DESCRIPTION("ia64 Corrected Error page migration driver");