arch/ia64/kernel/unaligned.c

   1 /*
   2  * Architecture-specific unaligned trap handling.
   3  *
   4  * Copyright (C) 1999-2002 Hewlett-Packard Co
   5  *      Stephane Eranian <eranian@hpl.hp.com>
   6  *      David Mosberger-Tang <davidm@hpl.hp.com>
   7  *
   8  * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
   9  *              get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
  10  *              stacked register returns an undefined value; it does NOT trigger a
  11  *              "rsvd register fault").
  12  * 2001/10/11   Fix unaligned access to rotating registers in s/w pipelined loops.
  13  * 2001/08/13   Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  14  * 2001/01/17   Add support emulation of unaligned kernel accesses.
  15  */
  16 #include <linux/kernel.h>
  17 #include <linux/sched.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/tty.h>
  20
  21 #include <asm/uaccess.h>
  22 #include <asm/rse.h>
  23 #include <asm/processor.h>
  24 #include <asm/unaligned.h>
  25
  26 extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
  27
  28 #undef DEBUG_UNALIGNED_TRAP
  29
  30 #ifdef DEBUG_UNALIGNED_TRAP
  31 # define DPRINT(a...)   do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
  32 # define DDUMP(str,vp,len)      dump(str, vp, len)
  33
  34 static void
  35 dump (const char *str, void *vp, size_t len)
  36 {
  37         unsigned char *cp = vp;
  38         int i;
  39
  40         printk("%s", str);
  41         for (i = 0; i < len; ++i)
  42                 printk (" %02x", *cp++);
  43         printk("\n");
  44 }
  45 #else
  46 # define DPRINT(a...)
  47 # define DDUMP(str,vp,len)
  48 #endif
  49
  50 #define IA64_FIRST_STACKED_GR   32
  51 #define IA64_FIRST_ROTATING_FR  32
  52 #define SIGN_EXT9               0xffffffffffffff00ul
  53
  54 /*
  55  * For M-unit:
  56  *
  57  *  opcode |   m  |   x6    |
  58  * --------|------|---------|
  59  * [40-37] | [36] | [35:30] |
  60  * --------|------|---------|
  61  *     4   |   1  |    6    | = 11 bits
  62  * --------------------------
  63  * However bits [31:30] are not directly useful to distinguish between
  64  * load/store so we can use [35:32] instead, which gives the following
  65  * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
  66  * checking the m-bit until later in the load/store emulation.
  67  */
  68 #define IA64_OPCODE_MASK        0x1ef
  69 #define IA64_OPCODE_SHIFT       32
  70
  71 /*
  72  * Table C-28 Integer Load/Store
  73  *
  74  * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  75  *
  76  * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
  77  * the address (bits [8:3]), so we must failed.
  78  */
  79 #define LD_OP            0x080
  80 #define LDS_OP           0x081
  81 #define LDA_OP           0x082
  82 #define LDSA_OP          0x083
  83 #define LDBIAS_OP        0x084
  84 #define LDACQ_OP         0x085
  85 /* 0x086, 0x087 are not relevant */
  86 #define LDCCLR_OP        0x088
  87 #define LDCNC_OP         0x089
  88 #define LDCCLRACQ_OP     0x08a
  89 #define ST_OP            0x08c
  90 #define STREL_OP         0x08d
  91 /* 0x08e,0x8f are not relevant */
  92
  93 /*
  94  * Table C-29 Integer Load +Reg
  95  *
  96  * we use the ld->m (bit [36:36]) field to determine whether or not we have
  97  * a load/store of this form.
  98  */
  99
 100 /*
 101  * Table C-30 Integer Load/Store +Imm
 102  *
 103  * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
 104  *
 105  * ld8.fill, st8.fill  must be aligned because the Nat register are based on
 106  * the address, so we must fail and the program must be fixed.
 107  */
 108 #define LD_IMM_OP            0x0a0
 109 #define LDS_IMM_OP           0x0a1
 110 #define LDA_IMM_OP           0x0a2
 111 #define LDSA_IMM_OP          0x0a3
 112 #define LDBIAS_IMM_OP        0x0a4
 113 #define LDACQ_IMM_OP         0x0a5
 114 /* 0x0a6, 0xa7 are not relevant */
 115 #define LDCCLR_IMM_OP        0x0a8
 116 #define LDCNC_IMM_OP         0x0a9
 117 #define LDCCLRACQ_IMM_OP     0x0aa
 118 #define ST_IMM_OP            0x0ac
 119 #define STREL_IMM_OP         0x0ad
 120 /* 0x0ae,0xaf are not relevant */
 121
 122 /*
 123  * Table C-32 Floating-point Load/Store
 124  */
 125 #define LDF_OP           0x0c0
 126 #define LDFS_OP          0x0c1
 127 #define LDFA_OP          0x0c2
 128 #define LDFSA_OP         0x0c3
 129 /* 0x0c6 is irrelevant */
 130 #define LDFCCLR_OP       0x0c8
 131 #define LDFCNC_OP        0x0c9
 132 /* 0x0cb is irrelevant  */
 133 #define STF_OP           0x0cc
 134
 135 /*
 136  * Table C-33 Floating-point Load +Reg
 137  *
 138  * we use the ld->m (bit [36:36]) field to determine whether or not we have
 139  * a load/store of this form.
 140  */
 141
 142 /*
 143  * Table C-34 Floating-point Load/Store +Imm
 144  */
 145 #define LDF_IMM_OP       0x0e0
 146 #define LDFS_IMM_OP      0x0e1
 147 #define LDFA_IMM_OP      0x0e2
 148 #define LDFSA_IMM_OP     0x0e3
 149 /* 0x0e6 is irrelevant */
 150 #define LDFCCLR_IMM_OP   0x0e8
 151 #define LDFCNC_IMM_OP    0x0e9
 152 #define STF_IMM_OP       0x0ec
 153
 154 typedef struct {
 155         unsigned long    qp:6;  /* [0:5]   */
 156         unsigned long    r1:7;  /* [6:12]  */
 157         unsigned long   imm:7;  /* [13:19] */
 158         unsigned long    r3:7;  /* [20:26] */
 159         unsigned long     x:1;  /* [27:27] */
 160         unsigned long  hint:2;  /* [28:29] */
 161         unsigned long x6_sz:2;  /* [30:31] */
 162         unsigned long x6_op:4;  /* [32:35], x6 = x6_sz|x6_op */
 163         unsigned long     m:1;  /* [36:36] */
 164         unsigned long    op:4;  /* [37:40] */
 165         unsigned long   pad:23; /* [41:63] */
 166 } load_store_t;
 167
 168
 169 typedef enum {
 170         UPD_IMMEDIATE,  /* ldXZ r1=[r3],imm(9) */
 171         UPD_REG         /* ldXZ r1=[r3],r2     */
 172 } update_t;
 173
 174 /*
 175  * We use tables to keep track of the offsets of registers in the saved state.
 176  * This way we save having big switch/case statements.
 177  *
 178  * We use bit 0 to indicate switch_stack or pt_regs.
 179  * The offset is simply shifted by 1 bit.
 180  * A 2-byte value should be enough to hold any kind of offset
 181  *
 182  * In case the calling convention changes (and thus pt_regs/switch_stack)
 183  * simply use RSW instead of RPT or vice-versa.
 184  */
 185
 186 #define RPO(x)  ((size_t) &((struct pt_regs *)0)->x)
 187 #define RSO(x)  ((size_t) &((struct switch_stack *)0)->x)
 188
 189 #define RPT(x)          (RPO(x) << 1)
 190 #define RSW(x)          (1| RSO(x)<<1)
 191
 192 #define GR_OFFS(x)      (gr_info[x]>>1)
 193 #define GR_IN_SW(x)     (gr_info[x] & 0x1)
 194
 195 #define FR_OFFS(x)      (fr_info[x]>>1)
 196 #define FR_IN_SW(x)     (fr_info[x] & 0x1)
 197
 198 static u16 gr_info[32]={
 199         0,                      /* r0 is read-only : WE SHOULD NEVER GET THIS */
 200
 201         RPT(r1), RPT(r2), RPT(r3),
 202
 203         RSW(r4), RSW(r5), RSW(r6), RSW(r7),
 204
 205         RPT(r8), RPT(r9), RPT(r10), RPT(r11),
 206         RPT(r12), RPT(r13), RPT(r14), RPT(r15),
 207
 208         RPT(r16), RPT(r17), RPT(r18), RPT(r19),
 209         RPT(r20), RPT(r21), RPT(r22), RPT(r23),
 210         RPT(r24), RPT(r25), RPT(r26), RPT(r27),
 211         RPT(r28), RPT(r29), RPT(r30), RPT(r31)
 212 };
 213
 214 static u16 fr_info[32]={
 215         0,                      /* constant : WE SHOULD NEVER GET THIS */
 216         0,                      /* constant : WE SHOULD NEVER GET THIS */
 217
 218         RSW(f2), RSW(f3), RSW(f4), RSW(f5),
 219
 220         RPT(f6), RPT(f7), RPT(f8), RPT(f9),
 221
 222         RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14),
 223         RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
 224         RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
 225         RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
 226         RSW(f30), RSW(f31)
 227 };
 228
 229 /* Invalidate ALAT entry for integer register REGNO.  */
 230 static void
 231 invala_gr (int regno)
 232 {
 233 #       define F(reg)   case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
 234
 235         switch (regno) {
 236                 F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 237                 F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 238                 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 239                 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 240                 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 241                 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 242                 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 243                 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 244                 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 245                 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 246                 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 247                 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 248                 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 249                 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 250                 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 251                 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 252         }
 253 #       undef F
 254 }
 255
 256 /* Invalidate ALAT entry for floating-point register REGNO.  */
 257 static void
 258 invala_fr (int regno)
 259 {
 260 #       define F(reg)   case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
 261
 262         switch (regno) {
 263                 F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 264                 F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 265                 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 266                 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 267                 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 268                 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 269                 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 270                 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 271                 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 272                 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 273                 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 274                 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 275                 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 276                 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 277                 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 278                 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 279         }
 280 #       undef F
 281 }
 282
 283 static inline unsigned long
 284 rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
 285 {
 286         reg += rrb;
 287         if (reg >= sor)
 288                 reg -= sor;
 289         return reg;
 290 }
 291
 292 static void
 293 set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
 294 {
 295         struct switch_stack *sw = (struct switch_stack *) regs - 1;
 296         unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
 297         unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 298         unsigned long rnats, nat_mask;
 299         unsigned long on_kbs;
 300         long sof = (regs->cr_ifs) & 0x7f;
 301         long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 302         long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 303         long ridx = r1 - 32;
 304
 305         if (ridx >= sof) {
 306                 /* this should never happen, as the "rsvd register fault" has higher priority */
 307                 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
 308                 return;
 309         }
 310
 311         if (ridx < sor)
 312                 ridx = rotate_reg(sor, rrb_gr, ridx);
 313
 314         DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 315                r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 316
 317         on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 318         addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 319         if (addr >= kbs) {
 320                 /* the register is on the kernel backing store: easy... */
 321                 rnat_addr = ia64_rse_rnat_addr(addr);
 322                 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 323                         rnat_addr = &sw->ar_rnat;
 324                 nat_mask = 1UL << ia64_rse_slot_num(addr);
 325
 326                 *addr = val;
 327                 if (nat)
 328                         *rnat_addr |=  nat_mask;
 329                 else
 330                         *rnat_addr &= ~nat_mask;
 331                 return;
 332         }
 333
 334         if (!user_stack(current, regs)) {
 335                 DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
 336                 return;
 337         }
 338
 339         bspstore = (unsigned long *)regs->ar_bspstore;
 340         ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 341         bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 342         addr    = ia64_rse_skip_regs(bsp, ridx);
 343
 344         DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 345
 346         ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 347
 348         rnat_addr = ia64_rse_rnat_addr(addr);
 349
 350         ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 351         DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
 352                (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
 353
 354         nat_mask = 1UL << ia64_rse_slot_num(addr);
 355         if (nat)
 356                 rnats |=  nat_mask;
 357         else
 358                 rnats &= ~nat_mask;
 359         ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
 360
 361         DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 362 }
 363
 364
 365 static void
 366 get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
 367 {
 368         struct switch_stack *sw = (struct switch_stack *) regs - 1;
 369         unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
 370         unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 371         unsigned long rnats, nat_mask;
 372         unsigned long on_kbs;
 373         long sof = (regs->cr_ifs) & 0x7f;
 374         long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 375         long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 376         long ridx = r1 - 32;
 377
 378         if (ridx >= sof) {
 379                 /* read of out-of-frame register returns an undefined value; 0 in our case.  */
 380                 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
 381                 goto fail;
 382         }
 383
 384         if (ridx < sor)
 385                 ridx = rotate_reg(sor, rrb_gr, ridx);
 386
 387         DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 388                r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 389
 390         on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 391         addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 392         if (addr >= kbs) {
 393                 /* the register is on the kernel backing store: easy... */
 394                 *val = *addr;
 395                 if (nat) {
 396                         rnat_addr = ia64_rse_rnat_addr(addr);
 397                         if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 398                                 rnat_addr = &sw->ar_rnat;
 399                         nat_mask = 1UL << ia64_rse_slot_num(addr);
 400                         *nat = (*rnat_addr & nat_mask) != 0;
 401                 }
 402                 return;
 403         }
 404
 405         if (!user_stack(current, regs)) {
 406                 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
 407                 goto fail;
 408         }
 409
 410         bspstore = (unsigned long *)regs->ar_bspstore;
 411         ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 412         bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 413         addr    = ia64_rse_skip_regs(bsp, ridx);
 414
 415         DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 416
 417         ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 418
 419         if (nat) {
 420                 rnat_addr = ia64_rse_rnat_addr(addr);
 421                 nat_mask = 1UL << ia64_rse_slot_num(addr);
 422
 423                 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 424
 425                 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 426                 *nat = (rnats & nat_mask) != 0;
 427         }
 428         return;
 429
 430   fail:
 431         *val = 0;
 432         if (nat)
 433                 *nat = 0;
 434         return;
 435 }
 436
 437
 438 static void
 439 setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
 440 {
 441         struct switch_stack *sw = (struct switch_stack *) regs - 1;
 442         unsigned long addr;
 443         unsigned long bitmask;
 444         unsigned long *unat;
 445
 446         /*
 447          * First takes care of stacked registers
 448          */
 449         if (regnum >= IA64_FIRST_STACKED_GR) {
 450                 set_rse_reg(regs, regnum, val, nat);
 451                 return;
 452         }
 453
 454         /*
 455          * Using r0 as a target raises a General Exception fault which has higher priority
 456          * than the Unaligned Reference fault.
 457          */
 458
 459         /*
 460          * Now look at registers in [0-31] range and init correct UNAT
 461          */
 462         if (GR_IN_SW(regnum)) {
 463                 addr = (unsigned long)sw;
 464                 unat = &sw->ar_unat;
 465         } else {
 466                 addr = (unsigned long)regs;
 467                 unat = &sw->caller_unat;
 468         }
 469         DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
 470                addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
 471         /*
 472          * add offset from base of struct
 473          * and do it !
 474          */
 475         addr += GR_OFFS(regnum);
 476
 477         *(unsigned long *)addr = val;
 478
 479         /*
 480          * We need to clear the corresponding UNAT bit to fully emulate the load
 481          * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
 482          */
 483         bitmask   = 1UL << (addr >> 3 & 0x3f);
 484         DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
 485         if (nat) {
 486                 *unat |= bitmask;
 487         } else {
 488                 *unat &= ~bitmask;
 489         }
 490         DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
 491 }
 492
 493 /*
 494  * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
 495  * range from 32-127, result is in the range from 0-95.
 496  */
 497 static inline unsigned long
 498 fph_index (struct pt_regs *regs, long regnum)
 499 {
 500         unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
 501         return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
 502 }
 503
 504 static void
 505 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 506 {
 507         struct switch_stack *sw = (struct switch_stack *)regs - 1;
 508         unsigned long addr;
 509
 510         /*
 511          * From EAS-2.5: FPDisableFault has higher priority than Unaligned
 512          * Fault. Thus, when we get here, we know the partition is enabled.
 513          * To update f32-f127, there are three choices:
 514          *
 515          *      (1) save f32-f127 to thread.fph and update the values there
 516          *      (2) use a gigantic switch statement to directly access the registers
 517          *      (3) generate code on the fly to update the desired register
 518          *
 519          * For now, we are using approach (1).
 520          */
 521         if (regnum >= IA64_FIRST_ROTATING_FR) {
 522                 ia64_sync_fph(current);
 523                 current->thread.fph[fph_index(regs, regnum)] = *fpval;
 524         } else {
 525                 /*
 526                  * pt_regs or switch_stack ?
 527                  */
 528                 if (FR_IN_SW(regnum)) {
 529                         addr = (unsigned long)sw;
 530                 } else {
 531                         addr = (unsigned long)regs;
 532                 }
 533
 534                 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
 535
 536                 addr += FR_OFFS(regnum);
 537                 *(struct ia64_fpreg *)addr = *fpval;
 538
 539                 /*
 540                  * mark the low partition as being used now
 541                  *
 542                  * It is highly unlikely that this bit is not already set, but
 543                  * let's do it for safety.
 544                  */
 545                 regs->cr_ipsr |= IA64_PSR_MFL;
 546         }
 547 }
 548
 549 /*
 550  * Those 2 inline functions generate the spilled versions of the constant floating point
 551  * registers which can be used with stfX
 552  */
 553 static inline void
 554 float_spill_f0 (struct ia64_fpreg *final)
 555 {
 556         __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
 557 }
 558
 559 static inline void
 560 float_spill_f1 (struct ia64_fpreg *final)
 561 {
 562         __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
 563 }
 564
 565 static void
 566 getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 567 {
 568         struct switch_stack *sw = (struct switch_stack *) regs - 1;
 569         unsigned long addr;
 570
 571         /*
 572          * From EAS-2.5: FPDisableFault has higher priority than
 573          * Unaligned Fault. Thus, when we get here, we know the partition is
 574          * enabled.
 575          *
 576          * When regnum > 31, the register is still live and we need to force a save
 577          * to current->thread.fph to get access to it.  See discussion in setfpreg()
 578          * for reasons and other ways of doing this.
 579          */
 580         if (regnum >= IA64_FIRST_ROTATING_FR) {
 581                 ia64_flush_fph(current);
 582                 *fpval = current->thread.fph[fph_index(regs, regnum)];
 583         } else {
 584                 /*
 585                  * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
 586                  * not saved, we must generate their spilled form on the fly
 587                  */
 588                 switch(regnum) {
 589                 case 0:
 590                         float_spill_f0(fpval);
 591                         break;
 592                 case 1:
 593                         float_spill_f1(fpval);
 594                         break;
 595                 default:
 596                         /*
 597                          * pt_regs or switch_stack ?
 598                          */
 599                         addr =  FR_IN_SW(regnum) ? (unsigned long)sw
 600                                                  : (unsigned long)regs;
 601
 602                         DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
 603                                FR_IN_SW(regnum), addr, FR_OFFS(regnum));
 604
 605                         addr  += FR_OFFS(regnum);
 606                         *fpval = *(struct ia64_fpreg *)addr;
 607                 }
 608         }
 609 }
 610
 611
 612 static void
 613 getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
 614 {
 615         struct switch_stack *sw = (struct switch_stack *) regs - 1;
 616         unsigned long addr, *unat;
 617
 618         if (regnum >= IA64_FIRST_STACKED_GR) {
 619                 get_rse_reg(regs, regnum, val, nat);
 620                 return;
 621         }
 622
 623         /*
 624          * take care of r0 (read-only always evaluate to 0)
 625          */
 626         if (regnum == 0) {
 627                 *val = 0;
 628                 if (nat)
 629                         *nat = 0;
 630                 return;
 631         }
 632
 633         /*
 634          * Now look at registers in [0-31] range and init correct UNAT
 635          */
 636         if (GR_IN_SW(regnum)) {
 637                 addr = (unsigned long)sw;
 638                 unat = &sw->ar_unat;
 639         } else {
 640                 addr = (unsigned long)regs;
 641                 unat = &sw->caller_unat;
 642         }
 643
 644         DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
 645
 646         addr += GR_OFFS(regnum);
 647
 648         *val  = *(unsigned long *)addr;
 649
 650         /*
 651          * do it only when requested
 652          */
 653         if (nat)
 654                 *nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
 655 }
 656
 657 static void
 658 emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
 659 {
 660         /*
 661          * IMPORTANT:
 662          * Given the way we handle unaligned speculative loads, we should
 663          * not get to this point in the code but we keep this sanity check,
 664          * just in case.
 665          */
 666         if (ld.x6_op == 1 || ld.x6_op == 3) {
 667                 printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
 668                 die_if_kernel("unaligned reference on speculative load with register update\n",
 669                               regs, 30);
 670         }
 671
 672
 673         /*
 674          * at this point, we know that the base register to update is valid i.e.,
 675          * it's not r0
 676          */
 677         if (type == UPD_IMMEDIATE) {
 678                 unsigned long imm;
 679
 680                 /*
 681                  * Load +Imm: ldXZ r1=[r3],imm(9)
 682                  *
 683                  *
 684                  * form imm9: [13:19] contain the first 7 bits
 685                  */
 686                 imm = ld.x << 7 | ld.imm;
 687
 688                 /*
 689                  * sign extend (1+8bits) if m set
 690                  */
 691                 if (ld.m) imm |= SIGN_EXT9;
 692
 693                 /*
 694                  * ifa == r3 and we know that the NaT bit on r3 was clear so
 695                  * we can directly use ifa.
 696                  */
 697                 ifa += imm;
 698
 699                 setreg(ld.r3, ifa, 0, regs);
 700
 701                 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
 702
 703         } else if (ld.m) {
 704                 unsigned long r2;
 705                 int nat_r2;
 706
 707                 /*
 708                  * Load +Reg Opcode: ldXZ r1=[r3],r2
 709                  *
 710                  * Note: that we update r3 even in the case of ldfX.a
 711                  * (where the load does not happen)
 712                  *
 713                  * The way the load algorithm works, we know that r3 does not
 714                  * have its NaT bit set (would have gotten NaT consumption
 715                  * before getting the unaligned fault). So we can use ifa
 716                  * which equals r3 at this point.
 717                  *
 718                  * IMPORTANT:
 719                  * The above statement holds ONLY because we know that we
 720                  * never reach this code when trying to do a ldX.s.
 721                  * If we ever make it to here on an ldfX.s then
 722                  */
 723                 getreg(ld.imm, &r2, &nat_r2, regs);
 724
 725                 ifa += r2;
 726
 727                 /*
 728                  * propagate Nat r2 -> r3
 729                  */
 730                 setreg(ld.r3, ifa, nat_r2, regs);
 731
 732                 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
 733         }
 734 }
 735
 736
 737 static int
 738 emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 739 {
 740         unsigned int len = 1 << ld.x6_sz;
 741
 742         /*
 743          * r0, as target, doesn't need to be checked because Illegal Instruction
 744          * faults have higher priority than unaligned faults.
 745          *
 746          * r0 cannot be found as the base as it would never generate an
 747          * unaligned reference.
 748          */
 749
 750         /*
 751          * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.
 752          * See comment below for explanation on how we handle ldX.a
 753          */
 754         if (ld.x6_op != 0x2) {
 755                 unsigned long val = 0;
 756
 757                 if (len != 2 && len != 4 && len != 8) {
 758                         DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 759                         return -1;
 760                 }
 761                 /* this assumes little-endian byte-order: */
 762                 if (copy_from_user(&val, (void *) ifa, len))
 763                     return -1;
 764                 setreg(ld.r1, val, 0, regs);
 765         }
 766
 767         /*
 768          * check for updates on any kind of loads
 769          */
 770         if (ld.op == 0x5 || ld.m)
 771                 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
 772
 773         /*
 774          * handling of various loads (based on EAS2.4):
 775          *
 776          * ldX.acq (ordered load):
 777          *      - acquire semantics would have been used, so force fence instead.
 778          *
 779          * ldX.c.clr (check load and clear):
 780          *      - if we get to this handler, it's because the entry was not in the ALAT.
 781          *        Therefore the operation reverts to a normal load
 782          *
 783          * ldX.c.nc (check load no clear):
 784          *      - same as previous one
 785          *
 786          * ldX.c.clr.acq (ordered check load and clear):
 787          *      - same as above for c.clr part. The load needs to have acquire semantics. So
 788          *        we use the fence semantics which is stronger and thus ensures correctness.
 789          *
 790          * ldX.a (advanced load):
 791          *      - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
 792          *        address doesn't match requested size alignement. This means that we would
 793          *        possibly need more than one load to get the result.
 794          *
 795          *        The load part can be handled just like a normal load, however the difficult
 796          *        part is to get the right thing into the ALAT. The critical piece of information
 797          *        in the base address of the load & size. To do that, a ld.a must be executed,
 798          *        clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
 799          *        if we use the same target register, we will be okay for the check.a instruction.
 800          *        If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
 801          *        which would overlap within [r3,r3+X] (the size of the load was store in the
 802          *        ALAT). If such an entry is found the entry is invalidated. But this is not good
 803          *        enough, take the following example:
 804          *              r3=3
 805          *              ld4.a r1=[r3]
 806          *
 807          *        Could be emulated by doing:
 808          *              ld1.a r1=[r3],1
 809          *              store to temporary;
 810          *              ld1.a r1=[r3],1
 811          *              store & shift to temporary;
 812          *              ld1.a r1=[r3],1
 813          *              store & shift to temporary;
 814          *              ld1.a r1=[r3]
 815          *              store & shift to temporary;
 816          *              r1=temporary
 817          *
 818          *        So int this case, you would get the right value is r1 but the wrong info in
 819          *        the ALAT.  Notice that you could do it in reverse to finish with address 3
 820          *        but you would still get the size wrong.  To get the size right, one needs to
 821          *        execute exactly the same kind of load. You could do it from a aligned
 822          *        temporary location, but you would get the address wrong.
 823          *
 824          *        So no matter what, it is not possible to emulate an advanced load
 825          *        correctly. But is that really critical ?
 826          *
 827          *
 828          *        Now one has to look at how ld.a is used, one must either do a ld.c.* or
 829          *        chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
 830          *        entry found in ALAT), and that's perfectly ok because:
 831          *
 832          *              - ld.c.*, if the entry is not present a  normal load is executed
 833          *              - chk.a.*, if the entry is not present, execution jumps to recovery code
 834          *
 835          *        In either case, the load can be potentially retried in another form.
 836          *
 837          *        So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
 838          *        must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
 839          *        a stale entry later) The register base update MUST also be performed.
 840          *
 841          *        Now what is the content of the register and its NaT bit in the case we don't
 842          *        do the load ?  EAS2.4, says (in case an actual load is needed)
 843          *
 844          *              - r1 = [r3], Nat = 0 if succeeds
 845          *              - r1 = 0 Nat = 0 if trying to access non-speculative memory
 846          *
 847          *        For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
 848          *        retry and thus eventually reload the register thereby changing Nat and
 849          *        register content.
 850          */
 851
 852         /*
 853          * when the load has the .acq completer then
 854          * use ordering fence.
 855          */
 856         if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
 857                 mb();
 858
 859         /*
 860          * invalidate ALAT entry in case of advanced load
 861          */
 862         if (ld.x6_op == 0x2)
 863                 invala_gr(ld.r1);
 864
 865         return 0;
 866 }
 867
 868 static int
 869 emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 870 {
 871         unsigned long r2;
 872         unsigned int len = 1 << ld.x6_sz;
 873
 874         /*
 875          * if we get to this handler, Nat bits on both r3 and r2 have already
 876          * been checked. so we don't need to do it
 877          *
 878          * extract the value to be stored
 879          */
 880         getreg(ld.imm, &r2, 0, regs);
 881
 882         /*
 883          * we rely on the macros in unaligned.h for now i.e.,
 884          * we let the compiler figure out how to read memory gracefully.
 885          *
 886          * We need this switch/case because the way the inline function
 887          * works. The code is optimized by the compiler and looks like
 888          * a single switch/case.
 889          */
 890         DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
 891
 892         if (len != 2 && len != 4 && len != 8) {
 893                 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 894                 return -1;
 895         }
 896
 897         /* this assumes little-endian byte-order: */
 898         if (copy_to_user((void *) ifa, &r2, len))
 899                 return -1;
 900
 901         /*
 902          * stX [r3]=r2,imm(9)
 903          *
 904          * NOTE:
 905          * ld.r3 can never be r0, because r0 would not generate an
 906          * unaligned access.
 907          */
 908         if (ld.op == 0x5) {
 909                 unsigned long imm;
 910
 911                 /*
 912                  * form imm9: [12:6] contain first 7bits
 913                  */
 914                 imm = ld.x << 7 | ld.r1;
 915                 /*
 916                  * sign extend (8bits) if m set
 917                  */
 918                 if (ld.m) imm |= SIGN_EXT9;
 919                 /*
 920                  * ifa == r3 (NaT is necessarily cleared)
 921                  */
 922                 ifa += imm;
 923
 924                 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
 925
 926                 setreg(ld.r3, ifa, 0, regs);
 927         }
 928         /*
 929          * we don't have alat_invalidate_multiple() so we need
 930          * to do the complete flush :-<<
 931          */
 932         ia64_invala();
 933
 934         /*
 935          * stX.rel: use fence instead of release
 936          */
 937         if (ld.x6_op == 0xd)
 938                 mb();
 939
 940         return 0;
 941 }
 942
 943 /*
 944  * floating point operations sizes in bytes
 945  */
 946 static const unsigned char float_fsz[4]={
 947         10, /* extended precision (e) */
 948         8,  /* integer (8)            */
 949         4,  /* single precision (s)   */
 950         8   /* double precision (d)   */
 951 };
 952
 953 static inline void
 954 mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 955 {
 956         __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
 957                               :: "r"(init), "r"(final) : "f6","memory");
 958 }
 959
 960 static inline void
 961 mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 962 {
 963         __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
 964                               :: "r"(init), "r"(final) : "f6","memory");
 965 }
 966
 967 static inline void
 968 mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 969 {
 970         __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
 971                               :: "r"(init), "r"(final) : "f6","memory");
 972 }
 973
 974 static inline void
 975 mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
 976 {
 977         __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
 978                               :: "r"(init), "r"(final) : "f6","memory");
 979 }
 980
 981 static inline void
 982 float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 983 {
 984         __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
 985                               :: "r"(init), "r"(final) : "f6","memory");
 986 }
 987
 988 static inline void
 989 float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 990 {
 991         __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
 992                               :: "r"(init), "r"(final) : "f6","memory");
 993 }
 994
 995 static inline void
 996 float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 997 {
 998         __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
 999                               :: "r"(init), "r"(final) : "f6","memory");
1000 }
1001
1002 static inline void
1003 float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1004 {
1005         __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
1006                               :: "r"(init), "r"(final) : "f6","memory");
1007 }
1008
1009 static int
1010 emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1011 {
1012         struct ia64_fpreg fpr_init[2];
1013         struct ia64_fpreg fpr_final[2];
1014         unsigned long len = float_fsz[ld.x6_sz];
1015
1016         /*
1017          * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1018          * higher priority than unaligned faults.
1019          *
1020          * r0 cannot be found as the base as it would never generate an unaligned
1021          * reference.
1022          */
1023
1024         /*
1025          * make sure we get clean buffers
1026          */
1027         memset(&fpr_init, 0, sizeof(fpr_init));
1028         memset(&fpr_final, 0, sizeof(fpr_final));
1029
1030         /*
1031          * ldfpX.a: we don't try to emulate anything but we must
1032          * invalidate the ALAT entry and execute updates, if any.
1033          */
1034         if (ld.x6_op != 0x2) {
1035                 /*
1036                  * This assumes little-endian byte-order.  Note that there is no "ldfpe"
1037                  * instruction:
1038                  */
1039                 if (copy_from_user(&fpr_init[0], (void *) ifa, len)
1040                     || copy_from_user(&fpr_init[1], (void *) (ifa + len), len))
1041                         return -1;
1042
1043                 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1044                 DDUMP("frp_init =", &fpr_init, 2*len);
1045                 /*
1046                  * XXX fixme
1047                  * Could optimize inlines by using ldfpX & 2 spills
1048                  */
1049                 switch( ld.x6_sz ) {
1050                         case 0:
1051                                 mem2float_extended(&fpr_init[0], &fpr_final[0]);
1052                                 mem2float_extended(&fpr_init[1], &fpr_final[1]);
1053                                 break;
1054                         case 1:
1055                                 mem2float_integer(&fpr_init[0], &fpr_final[0]);
1056                                 mem2float_integer(&fpr_init[1], &fpr_final[1]);
1057                                 break;
1058                         case 2:
1059                                 mem2float_single(&fpr_init[0], &fpr_final[0]);
1060                                 mem2float_single(&fpr_init[1], &fpr_final[1]);
1061                                 break;
1062                         case 3:
1063                                 mem2float_double(&fpr_init[0], &fpr_final[0]);
1064                                 mem2float_double(&fpr_init[1], &fpr_final[1]);
1065                                 break;
1066                 }
1067                 DDUMP("fpr_final =", &fpr_final, 2*len);
1068                 /*
1069                  * XXX fixme
1070                  *
1071                  * A possible optimization would be to drop fpr_final and directly
1072                  * use the storage from the saved context i.e., the actual final
1073                  * destination (pt_regs, switch_stack or thread structure).
1074                  */
1075                 setfpreg(ld.r1, &fpr_final[0], regs);
1076                 setfpreg(ld.imm, &fpr_final[1], regs);
1077         }
1078
1079         /*
1080          * Check for updates: only immediate updates are available for this
1081          * instruction.
1082          */
1083         if (ld.m) {
1084                 /*
1085                  * the immediate is implicit given the ldsz of the operation:
1086                  * single: 8 (2x4) and for  all others it's 16 (2x8)
1087                  */
1088                 ifa += len<<1;
1089
1090                 /*
1091                  * IMPORTANT:
1092                  * the fact that we force the NaT of r3 to zero is ONLY valid
1093                  * as long as we don't come here with a ldfpX.s.
1094                  * For this reason we keep this sanity check
1095                  */
1096                 if (ld.x6_op == 1 || ld.x6_op == 3)
1097                         printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1098                                __FUNCTION__);
1099
1100                 setreg(ld.r3, ifa, 0, regs);
1101         }
1102
1103         /*
1104          * Invalidate ALAT entries, if any, for both registers.
1105          */
1106         if (ld.x6_op == 0x2) {
1107                 invala_fr(ld.r1);
1108                 invala_fr(ld.imm);
1109         }
1110         return 0;
1111 }
1112
1113
1114 static int
1115 emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1116 {
1117         struct ia64_fpreg fpr_init;
1118         struct ia64_fpreg fpr_final;
1119         unsigned long len = float_fsz[ld.x6_sz];
1120
1121         /*
1122          * fr0 & fr1 don't need to be checked because Illegal Instruction
1123          * faults have higher priority than unaligned faults.
1124          *
1125          * r0 cannot be found as the base as it would never generate an
1126          * unaligned reference.
1127          */
1128
1129         /*
1130          * make sure we get clean buffers
1131          */
1132         memset(&fpr_init,0, sizeof(fpr_init));
1133         memset(&fpr_final,0, sizeof(fpr_final));
1134
1135         /*
1136          * ldfX.a we don't try to emulate anything but we must
1137          * invalidate the ALAT entry.
1138          * See comments in ldX for descriptions on how the various loads are handled.
1139          */
1140         if (ld.x6_op != 0x2) {
1141                 if (copy_from_user(&fpr_init, (void *) ifa, len))
1142                         return -1;
1143
1144                 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1145                 DDUMP("fpr_init =", &fpr_init, len);
1146                 /*
1147                  * we only do something for x6_op={0,8,9}
1148                  */
1149                 switch( ld.x6_sz ) {
1150                         case 0:
1151                                 mem2float_extended(&fpr_init, &fpr_final);
1152                                 break;
1153                         case 1:
1154                                 mem2float_integer(&fpr_init, &fpr_final);
1155                                 break;
1156                         case 2:
1157                                 mem2float_single(&fpr_init, &fpr_final);
1158                                 break;
1159                         case 3:
1160                                 mem2float_double(&fpr_init, &fpr_final);
1161                                 break;
1162                 }
1163                 DDUMP("fpr_final =", &fpr_final, len);
1164                 /*
1165                  * XXX fixme
1166                  *
1167                  * A possible optimization would be to drop fpr_final and directly
1168                  * use the storage from the saved context i.e., the actual final
1169                  * destination (pt_regs, switch_stack or thread structure).
1170                  */
1171                 setfpreg(ld.r1, &fpr_final, regs);
1172         }
1173
1174         /*
1175          * check for updates on any loads
1176          */
1177         if (ld.op == 0x7 || ld.m)
1178                 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1179
1180         /*
1181          * invalidate ALAT entry in case of advanced floating point loads
1182          */
1183         if (ld.x6_op == 0x2)
1184                 invala_fr(ld.r1);
1185
1186         return 0;
1187 }
1188
1189
1190 static int
1191 emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1192 {
1193         struct ia64_fpreg fpr_init;
1194         struct ia64_fpreg fpr_final;
1195         unsigned long len = float_fsz[ld.x6_sz];
1196
1197         /*
1198          * make sure we get clean buffers
1199          */
1200         memset(&fpr_init,0, sizeof(fpr_init));
1201         memset(&fpr_final,0, sizeof(fpr_final));
1202
1203         /*
1204          * if we get to this handler, Nat bits on both r3 and r2 have already
1205          * been checked. so we don't need to do it
1206          *
1207          * extract the value to be stored
1208          */
1209         getfpreg(ld.imm, &fpr_init, regs);
1210         /*
1211          * during this step, we extract the spilled registers from the saved
1212          * context i.e., we refill. Then we store (no spill) to temporary
1213          * aligned location
1214          */
1215         switch( ld.x6_sz ) {
1216                 case 0:
1217                         float2mem_extended(&fpr_init, &fpr_final);
1218                         break;
1219                 case 1:
1220                         float2mem_integer(&fpr_init, &fpr_final);
1221                         break;
1222                 case 2:
1223                         float2mem_single(&fpr_init, &fpr_final);
1224                         break;
1225                 case 3:
1226                         float2mem_double(&fpr_init, &fpr_final);
1227                         break;
1228         }
1229         DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1230         DDUMP("fpr_init =", &fpr_init, len);
1231         DDUMP("fpr_final =", &fpr_final, len);
1232
1233         if (copy_to_user((void *) ifa, &fpr_final, len))
1234                 return -1;
1235
1236         /*
1237          * stfX [r3]=r2,imm(9)
1238          *
1239          * NOTE:
1240          * ld.r3 can never be r0, because r0 would not generate an
1241          * unaligned access.
1242          */
1243         if (ld.op == 0x7) {
1244                 unsigned long imm;
1245
1246                 /*
1247                  * form imm9: [12:6] contain first 7bits
1248                  */
1249                 imm = ld.x << 7 | ld.r1;
1250                 /*
1251                  * sign extend (8bits) if m set
1252                  */
1253                 if (ld.m)
1254                         imm |= SIGN_EXT9;
1255                 /*
1256                  * ifa == r3 (NaT is necessarily cleared)
1257                  */
1258                 ifa += imm;
1259
1260                 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1261
1262                 setreg(ld.r3, ifa, 0, regs);
1263         }
1264         /*
1265          * we don't have alat_invalidate_multiple() so we need
1266          * to do the complete flush :-<<
1267          */
1268         ia64_invala();
1269
1270         return 0;
1271 }
1272
1273 /*
1274  * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1275  * eventually fix the program.  However, we don't want to do that for every access so we
1276  * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
1277  * either...
1278  */
1279 static int
1280 within_logging_rate_limit (void)
1281 {
1282         static unsigned long count, last_time;
1283
1284         if (jiffies - last_time > 5*HZ)
1285                 count = 0;
1286         if (++count < 5) {
1287                 last_time = jiffies;
1288                 return 1;
1289         }
1290         return 0;
1291
1292 }
1293
1294 void
1295 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1296 {
1297         struct ia64_psr *ipsr = ia64_psr(regs);
1298         mm_segment_t old_fs = get_fs();
1299         unsigned long bundle[2];
1300         unsigned long opcode;
1301         struct siginfo si;
1302         const struct exception_table_entry *eh = NULL;
1303         union {
1304                 unsigned long l;
1305                 load_store_t insn;
1306         } u;
1307         int ret = -1;
1308
1309         if (ia64_psr(regs)->be) {
1310                 /* we don't support big-endian accesses */
1311                 die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1312                 goto force_sigbus;
1313         }
1314
1315         /*
1316          * Treat kernel accesses for which there is an exception handler entry the same as
1317          * user-level unaligned accesses.  Otherwise, a clever program could trick this
1318          * handler into reading an arbitrary kernel addresses...
1319          */
1320         if (!user_mode(regs))
1321                 eh = SEARCH_EXCEPTION_TABLE(regs);
1322         if (user_mode(regs) || eh) {
1323                 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1324                         goto force_sigbus;
1325
1326                 if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1327                     && within_logging_rate_limit())
1328                 {
1329                         char buf[200];  /* comm[] is at most 16 bytes... */
1330                         size_t len;
1331
1332                         len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1333                                       "ip=0x%016lx\n\r", current->comm, current->pid,
1334                                       ifa, regs->cr_iip + ipsr->ri);
1335                         /*
1336                          * Don't call tty_write_message() if we're in the kernel; we might
1337                          * be holding locks...
1338                          */
1339                         if (user_mode(regs))
1340                                 tty_write_message(current->tty, buf);
1341                         buf[len-1] = '\0';      /* drop '\r' */
1342                         printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
1343                 }
1344         } else {
1345                 if (within_logging_rate_limit())
1346                         printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1347                                ifa, regs->cr_iip + ipsr->ri);
1348                 set_fs(KERNEL_DS);
1349         }
1350
1351         DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1352                regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1353
1354         if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
1355                 goto failure;
1356
1357         /*
1358          * extract the instruction from the bundle given the slot number
1359          */
1360         switch (ipsr->ri) {
1361               case 0: u.l = (bundle[0] >>  5); break;
1362               case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1363               case 2: u.l = (bundle[1] >> 23); break;
1364         }
1365         opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1366
1367         DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1368                "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1369                u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1370
1371         /*
1372          * IMPORTANT:
1373          * Notice that the switch statement DOES not cover all possible instructions
1374          * that DO generate unaligned references. This is made on purpose because for some
1375          * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1376          * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1377          * the program will get a signal and die:
1378          *
1379          *      load/store:
1380          *              - ldX.spill
1381          *              - stX.spill
1382          *      Reason: RNATs are based on addresses
1383          *
1384          *      synchronization:
1385          *              - cmpxchg
1386          *              - fetchadd
1387          *              - xchg
1388          *      Reason: ATOMIC operations cannot be emulated properly using multiple
1389          *              instructions.
1390          *
1391          *      speculative loads:
1392          *              - ldX.sZ
1393          *      Reason: side effects, code must be ready to deal with failure so simpler
1394          *              to let the load fail.
1395          * ---------------------------------------------------------------------------------
1396          * XXX fixme
1397          *
1398          * I would like to get rid of this switch case and do something
1399          * more elegant.
1400          */
1401         switch (opcode) {
1402               case LDS_OP:
1403               case LDSA_OP:
1404               case LDS_IMM_OP:
1405               case LDSA_IMM_OP:
1406               case LDFS_OP:
1407               case LDFSA_OP:
1408               case LDFS_IMM_OP:
1409                 /*
1410                  * The instruction will be retried with deferred exceptions turned on, and
1411                  * we should get Nat bit installed
1412                  *
1413                  * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1414                  * are actually executed even though the operation failed. So we don't
1415                  * need to take care of this.
1416                  */
1417                 DPRINT("forcing PSR_ED\n");
1418                 regs->cr_ipsr |= IA64_PSR_ED;
1419                 goto done;
1420
1421               case LD_OP:
1422               case LDA_OP:
1423               case LDBIAS_OP:
1424               case LDACQ_OP:
1425               case LDCCLR_OP:
1426               case LDCNC_OP:
1427               case LDCCLRACQ_OP:
1428               case LD_IMM_OP:
1429               case LDA_IMM_OP:
1430               case LDBIAS_IMM_OP:
1431               case LDACQ_IMM_OP:
1432               case LDCCLR_IMM_OP:
1433               case LDCNC_IMM_OP:
1434               case LDCCLRACQ_IMM_OP:
1435                 ret = emulate_load_int(ifa, u.insn, regs);
1436                 break;
1437
1438               case ST_OP:
1439               case STREL_OP:
1440               case ST_IMM_OP:
1441               case STREL_IMM_OP:
1442                 ret = emulate_store_int(ifa, u.insn, regs);
1443                 break;
1444
1445               case LDF_OP:
1446               case LDFA_OP:
1447               case LDFCCLR_OP:
1448               case LDFCNC_OP:
1449               case LDF_IMM_OP:
1450               case LDFA_IMM_OP:
1451               case LDFCCLR_IMM_OP:
1452               case LDFCNC_IMM_OP:
1453                 if (u.insn.x)
1454                         ret = emulate_load_floatpair(ifa, u.insn, regs);
1455                 else
1456                         ret = emulate_load_float(ifa, u.insn, regs);
1457                 break;
1458
1459               case STF_OP:
1460               case STF_IMM_OP:
1461                 ret = emulate_store_float(ifa, u.insn, regs);
1462                 break;
1463
1464               default:
1465                 goto failure;
1466         }
1467         DPRINT("ret=%d\n", ret);
1468         if (ret)
1469                 goto failure;
1470
1471         if (ipsr->ri == 2)
1472                 /*
1473                  * given today's architecture this case is not likely to happen because a
1474                  * memory access instruction (M) can never be in the last slot of a
1475                  * bundle. But let's keep it for now.
1476                  */
1477                 regs->cr_iip += 16;
1478         ipsr->ri = (ipsr->ri + 1) & 0x3;
1479
1480         DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1481   done:
1482         set_fs(old_fs);         /* restore original address limit */
1483         return;
1484
1485   failure:
1486         /* something went wrong... */
1487         if (!user_mode(regs)) {
1488                 if (eh) {
1489                         handle_exception(regs, eh);
1490                         goto done;
1491                 }
1492                 die_if_kernel("error during unaligned kernel access\n", regs, ret);
1493                 /* NOT_REACHED */
1494         }
1495   force_sigbus:
1496         si.si_signo = SIGBUS;
1497         si.si_errno = 0;
1498         si.si_code = BUS_ADRALN;
1499         si.si_addr = (void *) ifa;
1500         si.si_flags = 0;
1501         si.si_isr = 0;
1502         si.si_imm = 0;
1503         force_sig_info(SIGBUS, &si, current);
1504         goto done;
1505 }