extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
unsigned long *, unsigned long *, unsigned long *);
-asm ("
- .text
-
- // Assume L2 memory latency of 6 cycles.
-
- .proc xor_ia64_2
-xor_ia64_2:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 3, 0, 13, 16
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- ;;
- }
- .rotr s1[6+1], s2[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- }
- { .mfb
-(p[6+1]) st8.nta [r8] = d[1], 8
- nop.f 0
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_2
-
- .proc xor_ia64_3
-xor_ia64_3:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 4, 0, 20, 24
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov r18 = in3
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- ;;
- }
- .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- ;;
- }
- { .mmi
-(p[0]) ld8.nta s3[0] = [r18], 8
-(p[6+1]) st8.nta [r8] = d[1], 8
-(p[6]) xor d[0] = d[0], s3[6]
- }
- { .bbb
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_3
-
- .proc xor_ia64_4
-xor_ia64_4:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 5, 0, 27, 32
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov r18 = in3
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- }
- { .mfb
- mov r19 = in4
- ;;
- }
- .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- }
- { .mmi
-(p[0]) ld8.nta s3[0] = [r18], 8
-(p[0]) ld8.nta s4[0] = [r19], 8
-(p[6]) xor r20 = s3[6], s4[6]
- ;;
- }
- { .mib
-(p[6+1]) st8.nta [r8] = d[1], 8
-(p[6]) xor d[0] = d[0], r20
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_4
-
- .proc xor_ia64_5
-xor_ia64_5:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 6, 0, 34, 40
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov r18 = in3
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- }
- { .mib
- mov r19 = in4
- mov r20 = in5
- ;;
- }
- .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- }
- { .mmi
-(p[0]) ld8.nta s3[0] = [r18], 8
-(p[0]) ld8.nta s4[0] = [r19], 8
-(p[6]) xor r21 = s3[6], s4[6]
- ;;
- }
- { .mmi
-(p[0]) ld8.nta s5[0] = [r20], 8
-(p[6+1]) st8.nta [r8] = d[1], 8
-(p[6]) xor d[0] = d[0], r21
- ;;
- }
- { .mfb
-(p[6]) xor d[0] = d[0], s5[6]
- nop.f 0
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_5
+asm ("\n\
+ .text\n\
+\n\
+ // Assume L2 memory latency of 6 cycles.\n\
+\n\
+ .proc xor_ia64_2\n\
+xor_ia64_2:\n\
+ .prologue\n\
+ .fframe 0\n\
+ { .mii\n\
+ .save ar.pfs, r31\n\
+ alloc r31 = ar.pfs, 3, 0, 13, 16\n\
+ .save ar.lc, r30\n\
+ mov r30 = ar.lc\n\
+ .save pr, r29\n\
+ mov r29 = pr\n\
+ ;;\n\
+ }\n\
+ .body\n\
+ { .mii\n\
+ mov r8 = in1\n\
+ mov ar.ec = 6 + 2\n\
+ shr in0 = in0, 3\n\
+ ;;\n\
+ }\n\
+ { .mmi\n\
+ adds in0 = -1, in0\n\
+ mov r16 = in1\n\
+ mov r17 = in2\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov ar.lc = in0\n\
+ mov pr.rot = 1 << 16\n\
+ ;;\n\
+ }\n\
+ .rotr s1[6+1], s2[6+1], d[2]\n\
+ .rotp p[6+2]\n\
+0: { .mmi\n\
+(p[0]) ld8.nta s1[0] = [r16], 8\n\
+(p[0]) ld8.nta s2[0] = [r17], 8\n\
+(p[6]) xor d[0] = s1[6], s2[6]\n\
+ }\n\
+ { .mfb\n\
+(p[6+1]) st8.nta [r8] = d[1], 8\n\
+ nop.f 0\n\
+ br.ctop.dptk.few 0b\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov ar.lc = r30\n\
+ mov pr = r29, -1\n\
+ }\n\
+ { .bbb\n\
+ br.ret.sptk.few rp\n\
+ }\n\
+ .endp xor_ia64_2\n\
+\n\
+ .proc xor_ia64_3\n\
+xor_ia64_3:\n\
+ .prologue\n\
+ .fframe 0\n\
+ { .mii\n\
+ .save ar.pfs, r31\n\
+ alloc r31 = ar.pfs, 4, 0, 20, 24\n\
+ .save ar.lc, r30\n\
+ mov r30 = ar.lc\n\
+ .save pr, r29\n\
+ mov r29 = pr\n\
+ ;;\n\
+ }\n\
+ .body\n\
+ { .mii\n\
+ mov r8 = in1\n\
+ mov ar.ec = 6 + 2\n\
+ shr in0 = in0, 3\n\
+ ;;\n\
+ }\n\
+ { .mmi\n\
+ adds in0 = -1, in0\n\
+ mov r16 = in1\n\
+ mov r17 = in2\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov r18 = in3\n\
+ mov ar.lc = in0\n\
+ mov pr.rot = 1 << 16\n\
+ ;;\n\
+ }\n\
+ .rotr s1[6+1], s2[6+1], s3[6+1], d[2]\n\
+ .rotp p[6+2]\n\
+0: { .mmi\n\
+(p[0]) ld8.nta s1[0] = [r16], 8\n\
+(p[0]) ld8.nta s2[0] = [r17], 8\n\
+(p[6]) xor d[0] = s1[6], s2[6]\n\
+ ;;\n\
+ }\n\
+ { .mmi\n\
+(p[0]) ld8.nta s3[0] = [r18], 8\n\
+(p[6+1]) st8.nta [r8] = d[1], 8\n\
+(p[6]) xor d[0] = d[0], s3[6]\n\
+ }\n\
+ { .bbb\n\
+ br.ctop.dptk.few 0b\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov ar.lc = r30\n\
+ mov pr = r29, -1\n\
+ }\n\
+ { .bbb\n\
+ br.ret.sptk.few rp\n\
+ }\n\
+ .endp xor_ia64_3\n\
+\n\
+ .proc xor_ia64_4\n\
+xor_ia64_4:\n\
+ .prologue\n\
+ .fframe 0\n\
+ { .mii\n\
+ .save ar.pfs, r31\n\
+ alloc r31 = ar.pfs, 5, 0, 27, 32\n\
+ .save ar.lc, r30\n\
+ mov r30 = ar.lc\n\
+ .save pr, r29\n\
+ mov r29 = pr\n\
+ ;;\n\
+ }\n\
+ .body\n\
+ { .mii\n\
+ mov r8 = in1\n\
+ mov ar.ec = 6 + 2\n\
+ shr in0 = in0, 3\n\
+ ;;\n\
+ }\n\
+ { .mmi\n\
+ adds in0 = -1, in0\n\
+ mov r16 = in1\n\
+ mov r17 = in2\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov r18 = in3\n\
+ mov ar.lc = in0\n\
+ mov pr.rot = 1 << 16\n\
+ }\n\
+ { .mfb\n\
+ mov r19 = in4\n\
+ ;;\n\
+ }\n\
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]\n\
+ .rotp p[6+2]\n\
+0: { .mmi\n\
+(p[0]) ld8.nta s1[0] = [r16], 8\n\
+(p[0]) ld8.nta s2[0] = [r17], 8\n\
+(p[6]) xor d[0] = s1[6], s2[6]\n\
+ }\n\
+ { .mmi\n\
+(p[0]) ld8.nta s3[0] = [r18], 8\n\
+(p[0]) ld8.nta s4[0] = [r19], 8\n\
+(p[6]) xor r20 = s3[6], s4[6]\n\
+ ;;\n\
+ }\n\
+ { .mib\n\
+(p[6+1]) st8.nta [r8] = d[1], 8\n\
+(p[6]) xor d[0] = d[0], r20\n\
+ br.ctop.dptk.few 0b\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov ar.lc = r30\n\
+ mov pr = r29, -1\n\
+ }\n\
+ { .bbb\n\
+ br.ret.sptk.few rp\n\
+ }\n\
+ .endp xor_ia64_4\n\
+\n\
+ .proc xor_ia64_5\n\
+xor_ia64_5:\n\
+ .prologue\n\
+ .fframe 0\n\
+ { .mii\n\
+ .save ar.pfs, r31\n\
+ alloc r31 = ar.pfs, 6, 0, 34, 40\n\
+ .save ar.lc, r30\n\
+ mov r30 = ar.lc\n\
+ .save pr, r29\n\
+ mov r29 = pr\n\
+ ;;\n\
+ }\n\
+ .body\n\
+ { .mii\n\
+ mov r8 = in1\n\
+ mov ar.ec = 6 + 2\n\
+ shr in0 = in0, 3\n\
+ ;;\n\
+ }\n\
+ { .mmi\n\
+ adds in0 = -1, in0\n\
+ mov r16 = in1\n\
+ mov r17 = in2\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov r18 = in3\n\
+ mov ar.lc = in0\n\
+ mov pr.rot = 1 << 16\n\
+ }\n\
+ { .mib\n\
+ mov r19 = in4\n\
+ mov r20 = in5\n\
+ ;;\n\
+ }\n\
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]\n\
+ .rotp p[6+2]\n\
+0: { .mmi\n\
+(p[0]) ld8.nta s1[0] = [r16], 8\n\
+(p[0]) ld8.nta s2[0] = [r17], 8\n\
+(p[6]) xor d[0] = s1[6], s2[6]\n\
+ }\n\
+ { .mmi\n\
+(p[0]) ld8.nta s3[0] = [r18], 8\n\
+(p[0]) ld8.nta s4[0] = [r19], 8\n\
+(p[6]) xor r21 = s3[6], s4[6]\n\
+ ;;\n\
+ }\n\
+ { .mmi\n\
+(p[0]) ld8.nta s5[0] = [r20], 8\n\
+(p[6+1]) st8.nta [r8] = d[1], 8\n\
+(p[6]) xor d[0] = d[0], r21\n\
+ ;;\n\
+ }\n\
+ { .mfb\n\
+(p[6]) xor d[0] = d[0], s5[6]\n\
+ nop.f 0\n\
+ br.ctop.dptk.few 0b\n\
+ ;;\n\
+ }\n\
+ { .mii\n\
+ mov ar.lc = r30\n\
+ mov pr = r29, -1\n\
+ }\n\
+ { .bbb\n\
+ br.ret.sptk.few rp\n\
+ }\n\
+ .endp xor_ia64_5\n\
");
static struct xor_block_template xor_block_ia64 = {