AVR32: Relocate u-boot to SDRAM

Relocate the u-boot image into SDRAM like everyone else does. This
means that we can handle much larger .data and .bss than we used to.

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
diff --git a/cpu/at32ap/entry.S b/cpu/at32ap/entry.S
index b52d798..a6fc688 100644
--- a/cpu/at32ap/entry.S
+++ b/cpu/at32ap/entry.S
@@ -42,8 +42,7 @@
 	 * We're running at interrupt level 3, so we don't need to save
 	 * r8-r12 or lr to the stack.
 	 */
-	mov	r8, lo(timer_overflow)
-	orh	r8, hi(timer_overflow)
+	lda.w	r8, timer_overflow
 	ld.w	r9, r8[0]
 	mov	r10, -1
 	mtsr	SYSREG_COMPARE, r10
diff --git a/cpu/at32ap/exception.c b/cpu/at32ap/exception.c
index 4123c44..0672685 100644
--- a/cpu/at32ap/exception.c
+++ b/cpu/at32ap/exception.c
@@ -24,6 +24,8 @@
 #include <asm/sysreg.h>
 #include <asm/ptrace.h>
 
+DECLARE_GLOBAL_DATA_PTR;
+
 static const char * const cpu_modes[8] = {
 	"Application", "Supervisor", "Interrupt level 0", "Interrupt level 1",
 	"Interrupt level 2", "Interrupt level 3", "Exception", "NMI"
@@ -109,11 +111,10 @@
 	printf("CPU Mode: %s\n", cpu_modes[mode]);
 
 	/* Avoid exception loops */
-	if (regs->sp >= CFG_INIT_SP_ADDR
-	    || regs->sp < (CFG_INIT_SP_ADDR - CONFIG_STACKSIZE))
+	if (regs->sp < CFG_SDRAM_BASE || regs->sp >= gd->stack_end)
 		printf("\nStack pointer seems bogus, won't do stack dump\n");
 	else
-		dump_mem("\nStack: ", regs->sp, CFG_INIT_SP_ADDR);
+		dump_mem("\nStack: ", regs->sp, gd->stack_end);
 
 	panic("Unhandled exception\n");
 }
diff --git a/cpu/at32ap/interrupts.c b/cpu/at32ap/interrupts.c
index 85420a4..c9e0499 100644
--- a/cpu/at32ap/interrupts.c
+++ b/cpu/at32ap/interrupts.c
@@ -115,9 +115,12 @@
 static int set_interrupt_handler(unsigned int nr, void (*handler)(void),
 				 unsigned int priority)
 {
+	extern void _evba(void);
 	unsigned long intpr;
 	unsigned long handler_addr = (unsigned long)handler;
 
+	handler_addr -= (unsigned long)&_evba;
+
 	if ((handler_addr & HANDLER_MASK) != handler_addr
 	    || (priority & INTLEV_MASK) != priority)
 		return -EINVAL;
diff --git a/cpu/at32ap/start.S b/cpu/at32ap/start.S
index 4ae0b54..ab8c2b7 100644
--- a/cpu/at32ap/start.S
+++ b/cpu/at32ap/start.S
@@ -70,32 +70,12 @@
 
 2:	lddpc	sp, sp_init
 
-	/*
-	 * Relocate the data section and initialize .bss.  Everything
-	 * is guaranteed to be at least doubleword aligned by the
-	 * linker script.
-	 */
-	lddpc	r12, .Ldata_vma
-	lddpc	r11, .Ldata_lma
-	lddpc	r10, .Ldata_end
-	sub	r10, r12
-4:	ld.d	r8, r11++
-	sub	r10, 8
-	st.d	r12++, r8
-	brne	4b
-
-	mov	r8, 0
-	mov	r9, 0
-	lddpc	r10, .Lbss_end
-	sub	r10, r12
-4:	sub	r10, 8
-	st.d	r12++, r8
-	brne	4b
-
 	/* Initialize the GOT pointer */
 	lddpc	r6, got_init
 3:	rsub	r6, pc
-	ld.w	pc, r6[board_init_f@got]
+
+	/* Let's go */
+	rjmp	board_init_f
 
 	.align	2
 	.type	sp_init,@object
@@ -103,11 +83,82 @@
 	.long	CFG_INIT_SP_ADDR
 got_init:
 	.long	3b - _GLOBAL_OFFSET_TABLE_
-.Ldata_lma:
-	.long	__data_lma
-.Ldata_vma:
-	.long	_data
-.Ldata_end:
-	.long	_edata
-.Lbss_end:
-	.long	_end
+
+	/*
+	 * void	relocate_code(new_sp, new_gd, monitor_addr)
+	 *
+	 * Relocate the u-boot image into RAM and continue from there.
+	 * Does not return.
+	 */
+	.global	relocate_code
+	.type	relocate_code,@function
+relocate_code:
+	mov	sp, r12		/* use new stack */
+	mov	r12, r11	/* save new_gd */
+	mov	r11, r10	/* save destination address */
+
+	/* copy .text section and flush the cache along the way */
+	lda.w	r8, _text
+	lda.w	r9, _etext
+	sub	lr, r10, r8	/* relocation offset */
+
+1:	ldm	r8++, r0-r3
+	stm	r10, r0-r3
+	sub	r10, -16
+	ldm	r8++, r0-r3
+	stm	r10, r0-r3
+	sub	r10, -16
+	cp.w	r8, r9
+	cache	r10[-4], 0x0d	/* dcache clean/invalidate */
+	cache	r10[-4], 0x01	/* icache invalidate */
+	brlt	1b
+
+	/* flush write buffer */
+	sync	0
+
+	/* copy data sections */
+	lda.w	r9, _edata
+1:	ld.d	r0, r8++
+	st.d	r10++, r0
+	cp.w	r8, r9
+	brlt	1b
+
+	/* zero out .bss */
+	mov	r0, 0
+	mov	r1, 0
+	lda.w	r9, _end
+	sub	r9, r8
+1:	st.d	r10++, r0
+	sub	r9, 8
+	brgt	1b
+
+	/* jump to RAM */
+	sub	r0, pc, . - in_ram
+	add	pc, r0, lr
+
+	.align	2
+in_ram:
+	/* find the new GOT and relocate it */
+	lddpc	r6, got_init_reloc
+3:	rsub	r6, pc
+	mov	r8, r6
+	lda.w	r9, _egot
+	lda.w	r10, _got
+	sub	r9, r10
+1:	ld.w	r0, r8[0]
+	add	r0, lr
+	st.w	r8++, r0
+	sub	r9, 4
+	brgt	1b
+
+	/* Move the exception handlers */
+	mfsr	r2, SYSREG_EVBA
+	add	r2, lr
+	mtsr	SYSREG_EVBA, r2
+
+	/* Do the rest of the initialization sequence */
+	call	board_init_r
+
+	.align	2
+got_init_reloc:
+	.long	3b - _GLOBAL_OFFSET_TABLE_