ARM: 7379/1: DT: fix atags_to_fdt() second call site
[linux-flexiantxendom0.git] / arch / arm / boot / compressed / head.S
index c363458..9c18ebd 100644 (file)
@@ -216,6 +216,104 @@ restart:  adr     r0, LC0
                mov     r10, r6
 #endif
 
+               mov     r5, #0                  @ init dtb size to 0
+#ifdef CONFIG_ARM_APPENDED_DTB
+/*
+ *   r0  = delta
+ *   r2  = BSS start
+ *   r3  = BSS end
+ *   r4  = final kernel address
+ *   r5  = appended dtb size (still unknown)
+ *   r6  = _edata
+ *   r7  = architecture ID
+ *   r8  = atags/device tree pointer
+ *   r9  = size of decompressed image
+ *   r10 = end of this image, including  bss/stack/malloc space if non XIP
+ *   r11 = GOT start
+ *   r12 = GOT end
+ *   sp  = stack pointer
+ *
+ * if there are device trees (dtb) appended to zImage, advance r10 so that the
+ * dtb data will get relocated along with the kernel if necessary.
+ */
+
+               ldr     lr, [r6, #0]
+#ifndef __ARMEB__
+               ldr     r1, =0xedfe0dd0         @ sig is 0xd00dfeed big endian
+#else
+               ldr     r1, =0xd00dfeed
+#endif
+               cmp     lr, r1
+               bne     dtb_check_done          @ not found
+
+#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
+               /*
+                * OK... Let's do some funky business here.
+                * If we do have a DTB appended to zImage, and we do have
+                * an ATAG list around, we want the later to be translated
+                * and folded into the former here.  To be on the safe side,
+                * let's temporarily move  the stack away into the malloc
+                * area.  No GOT fixup has occurred yet, but none of the
+                * code we're about to call uses any global variable.
+               */
+               add     sp, sp, #0x10000
+               stmfd   sp!, {r0-r3, ip, lr}
+               mov     r0, r8
+               mov     r1, r6
+               sub     r2, sp, r6
+               bl      atags_to_fdt
+
+               /*
+                * If returned value is 1, there is no ATAG at the location
+                * pointed by r8.  Try the typical 0x100 offset from start
+                * of RAM and hope for the best.
+                */
+               cmp     r0, #1
+               sub     r0, r4, #TEXT_OFFSET
+               add     r0, r0, #0x100
+               mov     r1, r6
+               sub     r2, sp, r6
+               bleq    atags_to_fdt
+
+               ldmfd   sp!, {r0-r3, ip, lr}
+               sub     sp, sp, #0x10000
+#endif
+
+               mov     r8, r6                  @ use the appended device tree
+
+               /*
+                * Make sure that the DTB doesn't end up in the final
+                * kernel's .bss area. To do so, we adjust the decompressed
+                * kernel size to compensate if that .bss size is larger
+                * than the relocated code.
+                */
+               ldr     r5, =_kernel_bss_size
+               adr     r1, wont_overwrite
+               sub     r1, r6, r1
+               subs    r1, r5, r1
+               addhi   r9, r9, r1
+
+               /* Get the dtb's size */
+               ldr     r5, [r6, #4]
+#ifndef __ARMEB__
+               /* convert r5 (dtb size) to little endian */
+               eor     r1, r5, r5, ror #16
+               bic     r1, r1, #0x00ff0000
+               mov     r5, r5, ror #8
+               eor     r5, r5, r1, lsr #8
+#endif
+
+               /* preserve 64-bit alignment */
+               add     r5, r5, #7
+               bic     r5, r5, #7
+
+               /* relocate some pointers past the appended dtb */
+               add     r6, r6, r5
+               add     r10, r10, r5
+               add     sp, sp, r5
+dtb_check_done:
+#endif
+
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
@@ -223,15 +321,14 @@ restart:  adr     r0, LC0
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
  *   r4 - 16k page directory >= r10 -> OK
- *   r4 + image length <= current position (pc) -> OK
+ *   r4 + image length <= address of wont_overwrite -> OK
  */
                add     r10, r10, #16384
                cmp     r4, r10
                bhs     wont_overwrite
                add     r10, r4, r9
-   ARM(                cmp     r10, pc         )
- THUMB(                mov     lr, pc          )
- THUMB(                cmp     r10, lr         )
+               adr     r9, wont_overwrite
+               cmp     r10, r9
                bls     wont_overwrite
 
 /*
@@ -285,14 +382,16 @@ wont_overwrite:
  *   r2  = BSS start
  *   r3  = BSS end
  *   r4  = kernel execution address
+ *   r5  = appended dtb size (0 if not present)
  *   r7  = architecture ID
  *   r8  = atags pointer
  *   r11 = GOT start
  *   r12 = GOT end
  *   sp  = stack pointer
  */
-               teq     r0, #0
+               orrs    r1, r0, r5
                beq     not_relocated
+
                add     r11, r11, r0
                add     r12, r12, r0
 
@@ -307,12 +406,21 @@ wont_overwrite:
 
                /*
                 * Relocate all entries in the GOT table.
+                * Bump bss entries to _edata + dtb size
                 */
 1:             ldr     r1, [r11, #0]           @ relocate entries in the GOT
-               add     r1, r1, r0              @ table.  This fixes up the
-               str     r1, [r11], #4           @ C references.
+               add     r1, r1, r0              @ This fixes up C references
+               cmp     r1, r2                  @ if entry >= bss_start &&
+               cmphs   r3, r1                  @       bss_end > entry
+               addhi   r1, r1, r5              @    entry += dtb size
+               str     r1, [r11], #4           @ next entry
                cmp     r11, r12
                blo     1b
+
+               /* bump our bss pointers too */
+               add     r2, r2, r5
+               add     r3, r3, r5
+
 #else
 
                /*
@@ -353,7 +461,8 @@ not_relocated:      mov     r0, #0
                mov     r0, #0                  @ must be zero
                mov     r1, r7                  @ restore architecture number
                mov     r2, r8                  @ restore atags pointer
-               mov     pc, r4                  @ call kernel
+ ARM(          mov     pc, r4  )               @ call kernel
+ THUMB(                bx      r4      )               @ entry point is always ARM
 
                .align  2
                .type   LC0, #object
@@ -473,7 +582,11 @@ __setup_mmu:       sub     r3, r4, #16384          @ Page directory size
                orr     r1, r1, #3 << 10
                add     r2, r3, #16384
 1:             cmp     r1, r9                  @ if virt > start of RAM
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+               orrhs   r1, r1, #0x08           @ set cacheable
+#else
                orrhs   r1, r1, #0x0c           @ set cacheable, bufferable
+#endif
                cmp     r1, r10                 @ if virt > end of RAM
                bichs   r1, r1, #0x0c           @ clear cacheable, bufferable
                str     r1, [r0], #4            @ 1:1 mapping
@@ -498,6 +611,12 @@ __setup_mmu:       sub     r3, r4, #16384          @ Page directory size
                mov     pc, lr
 ENDPROC(__setup_mmu)
 
+__arm926ejs_mmu_cache_on:
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+               mov     r0, #4                  @ put dcache in WT mode
+               mcr     p15, 7, r0, c15, c0, 0
+#endif
+
 __armv4_mmu_cache_on:
                mov     r12, lr
 #ifdef CONFIG_MMU
@@ -587,6 +706,8 @@ __common_mmu_cache_on:
                sub     pc, lr, r0, lsr #32     @ properly flush pipeline
 #endif
 
+#define PROC_ENTRY_SIZE (4*5)
+
 /*
  * Here follow the relocatable cache support functions for the
  * various processors.  This is a generic hook for locating an
@@ -614,7 +735,7 @@ call_cache_fn:      adr     r12, proc_types
  ARM(          addeq   pc, r12, r3             ) @ call cache function
  THUMB(                addeq   r12, r3                 )
  THUMB(                moveq   pc, r12                 ) @ call cache function
-               add     r12, r12, #4*5
+               add     r12, r12, #PROC_ENTRY_SIZE
                b       1b
 
 /*
@@ -679,6 +800,12 @@ proc_types:
                W(b)    __armv4_mpu_cache_off
                W(b)    __armv4_mpu_cache_flush
 
+               .word   0x41069260              @ ARM926EJ-S (v5TEJ)
+               .word   0xff0ffff0
+               W(b)    __arm926ejs_mmu_cache_on
+               W(b)    __armv4_mmu_cache_off
+               W(b)    __armv5tej_mmu_cache_flush
+
                .word   0x00007000              @ ARM7 IDs
                .word   0x0000f000
                mov     pc, lr
@@ -778,6 +905,16 @@ proc_types:
 
                .size   proc_types, . - proc_types
 
+               /*
+                * If you get a "non-constant expression in ".if" statement"
+                * error from the assembler on this line, check that you have
+                * not accidentally written a "b" instruction where you should
+                * have written W(b).
+                */
+               .if (. - proc_types) % PROC_ENTRY_SIZE != 0
+               .error "The size of one or more proc_types entries is wrong."
+               .endif
+
 /*
  * Turn off the Cache and MMU.  ARMv3 does not support
  * reading the control register, but ARMv4 does.