ARM: implement relocation for ARM926

Change the implementation for arm926 to relocate the code to
an arbitrary address in RAM.

Adapt the TX25 (i.MX25), magnesium board to test the changes.

On the tx25 board TEXT_BASE is set to the final relocation
address to prevent one more copying of u-boot code
when relocating. More info see:
doc/README.arm-relocation

da850 board:
Tested-by: Ben Gardiner <bengardiner@nanometrics.ca>

Portions of this work were supported by funding from
the CE Linux Forum.

Signed-off-by: Heiko Schocher <hs@denx.de>
Cc: Ben Gardiner <bengardiner@nanometrics.ca>
diff --git a/arch/arm/cpu/arm926ejs/orion5x/dram.c b/arch/arm/cpu/arm926ejs/orion5x/dram.c
index c719798..c5c8ab7 100644
--- a/arch/arm/cpu/arm926ejs/orion5x/dram.c
+++ b/arch/arm/cpu/arm926ejs/orion5x/dram.c
@@ -49,7 +49,7 @@
 	result = winregs[bank].base;
 	return result;
 }
-
+#if defined(CONFIG_SYS_ARM_WITHOUT_RELOC)
 int dram_init(void)
 {
 	int i;
@@ -62,3 +62,25 @@
 	}
 	return 0;
 }
+#else
+int dram_init (void)
+{
+	/* dram_init must store complete ramsize in gd->ram_size */
+	gd->ram_size = get_ram_size(
+			(volatile long *) orion5x_sdram_bar(0),
+			CONFIG_MAX_RAM_BANK_SIZE);
+	return 0;
+}
+
+void dram_init_banksize (void)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+		gd->bd->bi_dram[i].start = orion5x_sdram_bar(i);
+		gd->bd->bi_dram[i].size = get_ram_size(
+			(volatile long *) (gd->bd->bi_dram[i].start),
+			CONFIG_MAX_RAM_BANK_SIZE);
+	}
+}
+#endif
diff --git a/arch/arm/cpu/arm926ejs/start.S b/arch/arm/cpu/arm926ejs/start.S
index cf40ce1..16ee972 100644
--- a/arch/arm/cpu/arm926ejs/start.S
+++ b/arch/arm/cpu/arm926ejs/start.S
@@ -114,12 +114,15 @@
  *************************************************************************
  */
 
+.globl _TEXT_BASE
 _TEXT_BASE:
 	.word	TEXT_BASE
 
+#if defined(CONFIG_SYS_ARM_WITHOUT_RELOC)
 .globl _armboot_start
 _armboot_start:
 	.word _start
+#endif
 
 /*
  * These are defined in the board-specific linker script.
@@ -144,7 +147,165 @@
 	.word 0x0badc0de
 #endif
 
+#if !defined(CONFIG_SYS_ARM_WITHOUT_RELOC)
+/* IRQ stack memory (calculated at run-time) + 8 bytes */
+.globl IRQ_STACK_START_IN
+IRQ_STACK_START_IN:
+	.word	0x0badc0de
 
+.globl _datarel_start
+_datarel_start:
+	.word __datarel_start
+
+.globl _datarelrolocal_start
+_datarelrolocal_start:
+	.word __datarelrolocal_start
+
+.globl _datarellocal_start
+_datarellocal_start:
+	.word __datarellocal_start
+
+.globl _datarelro_start
+_datarelro_start:
+	.word __datarelro_start
+
+.globl _got_start
+_got_start:
+	.word __got_start
+
+.globl _got_end
+_got_end:
+	.word __got_end
+
+/*
+ * the actual reset code
+ */
+
+reset:
+	/*
+	 * set the cpu to SVC32 mode
+	 */
+	mrs	r0,cpsr
+	bic	r0,r0,#0x1f
+	orr	r0,r0,#0xd3
+	msr	cpsr,r0
+
+	/*
+	 * we do sys-critical inits only at reboot,
+	 * not when booting from ram!
+	 */
+#ifndef CONFIG_SKIP_LOWLEVEL_INIT
+	bl	cpu_init_crit
+#endif
+
+/* Set stackpointer in internal RAM to call board_init_f */
+call_board_init_f:
+	ldr	sp, =(CONFIG_SYS_INIT_SP_ADDR)
+	ldr	r0,=0x00000000
+	bl	board_init_f
+
+/*------------------------------------------------------------------------------*/
+
+/*
+ * void relocate_code (addr_sp, gd, addr_moni)
+ *
+ * This "function" does not return, instead it continues in RAM
+ * after relocating the monitor code.
+ *
+ */
+	.globl	relocate_code
+relocate_code:
+	mov	r4, r0	/* save addr_sp */
+	mov	r5, r1	/* save addr of gd */
+	mov	r6, r2	/* save addr of destination */
+	mov	r7, r2	/* save addr of destination */
+
+	/* Set up the stack						    */
+stack_setup:
+	mov	sp, r4
+
+	adr	r0, _start
+	ldr	r2, _TEXT_BASE
+	ldr	r3, _bss_start
+	sub	r2, r3, r2		/* r2 <- size of armboot	    */
+	add	r2, r0, r2		/* r2 <- source end address	    */
+	cmp	r0, r6
+	beq	clear_bss
+
+#ifndef CONFIG_SKIP_RELOCATE_UBOOT
+copy_loop:
+	ldmia	r0!, {r9-r10}		/* copy from source address [r0]    */
+	stmia	r6!, {r9-r10}		/* copy to   target address [r1]    */
+	cmp	r0, r2			/* until source end addreee [r2]    */
+	ble	copy_loop
+
+#ifndef CONFIG_PRELOADER
+	/* fix got entries */
+	ldr	r1, _TEXT_BASE		/* Text base */
+	mov	r0, r7			/* reloc addr */
+	ldr	r2, _got_start		/* addr in Flash */
+	ldr	r3, _got_end		/* addr in Flash */
+	sub	r3, r3, r1
+	add	r3, r3, r0
+	sub	r2, r2, r1
+	add	r2, r2, r0
+
+fixloop:
+	ldr	r4, [r2]
+	sub	r4, r4, r1
+	add	r4, r4, r0
+	str	r4, [r2]
+	add	r2, r2, #4
+	cmp	r2, r3
+	bne	fixloop
+#endif
+#endif	/* #ifndef CONFIG_SKIP_RELOCATE_UBOOT */
+
+clear_bss:
+#ifndef CONFIG_PRELOADER
+	ldr	r0, _bss_start
+	ldr	r1, _bss_end
+	ldr	r3, _TEXT_BASE		/* Text base */
+	mov	r4, r7			/* reloc addr */
+	sub	r0, r0, r3
+	add	r0, r0, r4
+	sub	r1, r1, r3
+	add	r1, r1, r4
+	mov	r2, #0x00000000		/* clear			    */
+
+clbss_l:str	r2, [r0]		/* clear loop...		    */
+	add	r0, r0, #4
+	cmp	r0, r1
+	bne	clbss_l
+
+	bl coloured_LED_init
+	bl red_LED_on
+#endif
+
+/*
+ * We are done. Do not return, instead branch to second part of board
+ * initialization, now running from RAM.
+ */
+#ifdef CONFIG_NAND_SPL
+	ldr     pc, _nand_boot
+
+_nand_boot: .word nand_boot
+#else
+	ldr	r0, _TEXT_BASE
+	ldr	r2, _board_init_r
+	sub	r2, r2, r0
+	add	r2, r2, r7	/* position from board_init_r in RAM */
+	/* setup parameters for board_init_r */
+	mov	r0, r5		/* gd_t */
+	mov	r1, r7		/* dest_addr */
+	/* jump to it ... */
+	mov	lr, r2
+	mov	pc, lr
+
+_board_init_r: .word board_init_r
+#endif
+
+#else /* #if !defined(CONFIG_SYS_ARM_WITHOUT_RELOC) */
 /*
  * the actual reset code
  */
@@ -221,7 +382,7 @@
 #else
 	.word start_armboot
 #endif /* CONFIG_NAND_SPL */
-
+#endif /* #if !defined(CONFIG_SYS_ARM_WITHOUT_RELOC) */
 
 /*
  *************************************************************************
@@ -307,10 +468,13 @@
 	@ carve out a frame on current user stack
 	sub	sp, sp, #S_FRAME_SIZE
 	stmia	sp, {r0 - r12}	@ Save user registers (now in svc mode) r0-r12
-
+#if defined(CONFIG_SYS_ARM_WITHOUT_RELOC)
 	ldr	r2, _armboot_start
 	sub	r2, r2, #(CONFIG_STACKSIZE+CONFIG_SYS_MALLOC_LEN)
 	sub	r2, r2, #(CONFIG_SYS_GBL_DATA_SIZE+8)  @ set base 2 words into abort stack
+#else
+	ldr	r2, IRQ_STACK_START_IN
+#endif
 	@ get values for "aborted" pc and cpsr (into parm regs)
 	ldmia	r2, {r2 - r3}
 	add	r0, sp, #S_FRAME_SIZE		@ grab pointer to old stack
@@ -342,9 +506,13 @@
 	.endm
 
 	.macro get_bad_stack
+#if defined(CONFIG_SYS_ARM_WITHOUT_RELOC)
 	ldr	r13, _armboot_start		@ setup our mode stack
 	sub	r13, r13, #(CONFIG_STACKSIZE+CONFIG_SYS_MALLOC_LEN)
 	sub	r13, r13, #(CONFIG_SYS_GBL_DATA_SIZE+8) @ reserved a couple spots in abort stack
+#else
+	ldr	r13, IRQ_STACK_START_IN		@ setup our mode stack
+#endif
 
 	str	lr, [r13]	@ save caller lr in position 0 of saved stack
 	mrs	lr, spsr	@ get the spsr
diff --git a/arch/arm/cpu/arm926ejs/u-boot.lds b/arch/arm/cpu/arm926ejs/u-boot.lds
index ecbc58c..02eb8ca 100644
--- a/arch/arm/cpu/arm926ejs/u-boot.lds
+++ b/arch/arm/cpu/arm926ejs/u-boot.lds
@@ -39,11 +39,23 @@
 	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
 
 	. = ALIGN(4);
-	.data : { *(.data) }
+	.data : {
+		*(.data)
+	__datarel_start = .;
+		*(.data.rel)
+	__datarelrolocal_start = .;
+		*(.data.rel.ro.local)
+	__datarellocal_start = .;
+		*(.data.rel.local)
+	__datarelro_start = .;
+		*(.data.rel.ro)
+	}
 
+	__got_start = .;
 	. = ALIGN(4);
 	.got : { *(.got) }
 
+	__got_end = .;
 	. = .;
 	__u_boot_cmd_start = .;
 	.u_boot_cmd : { *(.u_boot_cmd) }
diff --git a/board/karo/tx25/config.mk b/board/karo/tx25/config.mk
index 732a14a..51ca1ab 100644
--- a/board/karo/tx25/config.mk
+++ b/board/karo/tx25/config.mk
@@ -1,5 +1,5 @@
 ifdef CONFIG_NAND_SPL
-TEXT_BASE = 0x81ec0000
+TEXT_BASE = 0x810c0000
 else
-TEXT_BASE = 0x81f00000
+TEXT_BASE = 0x81fc0000
 endif
diff --git a/board/karo/tx25/tx25.c b/board/karo/tx25/tx25.c
index 2608698..dc57d5c 100644
--- a/board/karo/tx25/tx25.c
+++ b/board/karo/tx25/tx25.c
@@ -159,7 +159,14 @@
 
 int dram_init (void)
 {
+	/* dram_init must store complete ramsize in gd->ram_size */
+	gd->ram_size = get_ram_size((volatile void *)PHYS_SDRAM_1,
+				PHYS_SDRAM_1_SIZE);
+	return 0;
+}
 
+void dram_init_banksize(void)
+{
 	gd->bd->bi_dram[0].start = PHYS_SDRAM_1;
 	gd->bd->bi_dram[0].size = get_ram_size((volatile void *)PHYS_SDRAM_1,
 			PHYS_SDRAM_1_SIZE);
@@ -167,9 +174,9 @@
 	gd->bd->bi_dram[1].start = PHYS_SDRAM_2;
 	gd->bd->bi_dram[1].size = get_ram_size((volatile void *)PHYS_SDRAM_2,
 			PHYS_SDRAM_2_SIZE);
-#endif
+#else
 
-	return 0;
+#endif
 }
 
 int checkboard(void)
diff --git a/board/keymile/km_arm/km_arm.c b/board/keymile/km_arm/km_arm.c
index d7cbd7a..7c0b858 100644
--- a/board/keymile/km_arm/km_arm.c
+++ b/board/keymile/km_arm/km_arm.c
@@ -225,6 +225,7 @@
 	);
 #endif
 
+#if defined(CONFIG_SYS_ARM_WITHOUT_RELOC)
 int dram_init(void)
 {
 	int i;
@@ -234,8 +235,31 @@
 		gd->bd->bi_dram[i].size = get_ram_size((long *)kw_sdram_bar(i),
 						       kw_sdram_bs(i));
 	}
+
 	return 0;
 }
+#else
+int dram_init(void)
+{
+	/* dram_init must store complete ramsize in gd->ram_size */
+	/* Fix this */
+	gd->ram_size = get_ram_size((volatile void *)kw_sdram_bar(0),
+				kw_sdram_bs(0));
+	return 0;
+}
+
+void dram_init_banksize(void)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+		gd->bd->bi_dram[i].start = kw_sdram_bar(i);
+		gd->bd->bi_dram[i].size = kw_sdram_bs(i);
+		gd->bd->bi_dram[i].size = get_ram_size((long *)kw_sdram_bar(i),
+						       kw_sdram_bs(i));
+	}
+}
+#endif
 
 /* Configure and enable MV88E1118 PHY */
 void reset_phy(void)
diff --git a/board/logicpd/imx27lite/config.mk b/board/logicpd/imx27lite/config.mk
index a2e7768..2f9c4e6 100644
--- a/board/logicpd/imx27lite/config.mk
+++ b/board/logicpd/imx27lite/config.mk
@@ -1 +1 @@
-TEXT_BASE = 0xA7F00000
+TEXT_BASE = 0xc0000000
diff --git a/board/logicpd/imx27lite/imx27lite.c b/board/logicpd/imx27lite/imx27lite.c
index 4427415..6eb5cc2 100644
--- a/board/logicpd/imx27lite/imx27lite.c
+++ b/board/logicpd/imx27lite/imx27lite.c
@@ -66,19 +66,22 @@
 
 int dram_init (void)
 {
+	/* dram_init must store complete ramsize in gd->ram_size */
+	gd->ram_size = get_ram_size((volatile void *)CONFIG_SYS_SDRAM_BASE,
+				PHYS_SDRAM_1_SIZE);
+	return 0;
+}
 
-#if CONFIG_NR_DRAM_BANKS > 0
-	gd->bd->bi_dram[0].start = PHYS_SDRAM_1;
-	gd->bd->bi_dram[0].size = get_ram_size((volatile void *)PHYS_SDRAM_1,
+void dram_init_banksize(void)
+{
+	gd->bd->bi_dram[0].start = CONFIG_SYS_SDRAM_BASE;
+	gd->bd->bi_dram[0].size = get_ram_size((volatile void *)CONFIG_SYS_SDRAM_BASE,
 			PHYS_SDRAM_1_SIZE);
-#endif
 #if CONFIG_NR_DRAM_BANKS > 1
 	gd->bd->bi_dram[1].start = PHYS_SDRAM_2;
 	gd->bd->bi_dram[1].size = get_ram_size((volatile void *)PHYS_SDRAM_2,
 			PHYS_SDRAM_2_SIZE);
 #endif
-
-	return 0;
 }
 
 int checkboard(void)
diff --git a/doc/README.arm-relocation b/doc/README.arm-relocation
index 6be1a12..e3ed60e 100644
--- a/doc/README.arm-relocation
+++ b/doc/README.arm-relocation
@@ -43,6 +43,29 @@
 
 -------------------------------------------------------------------------------------
 
+For boards which boot from nand_spl, it is possible to save a copy
+if TEXT_BASE == relocation address! This prevents that uboot code
+is copied again in relocate_code().
+
+example for the tx25 board:
+
+a) cpu starts
+b) it copies the first page in nand to internal ram
+   (nand_spl_code)
+c) end executes this code
+d) this initialize CPU, RAM, ... and copy itself to RAM
+   (this bin must fit in one page, so board_init_f()
+    don;t fit in it ... )
+e) there it copy u-boot to CONFIG_SYS_NAND_U_BOOT_DST and
+   starts this image @ CONFIG_SYS_NAND_U_BOOT_START
+f) u-boot code steps through board_init_f() and calculates
+   the relocation address and copy itself to it
+
+If TEXT_BASE == relocation address, the copying of u-boot
+in f) could be saved.
+
+-------------------------------------------------------------------------------------
+
 ToDo:
 
 - fill in bd_t infos (check)
diff --git a/include/configs/da850evm.h b/include/configs/da850evm.h
index 357715d..016a21e 100644
--- a/include/configs/da850evm.h
+++ b/include/configs/da850evm.h
@@ -39,7 +39,6 @@
 #define CONFIG_SYS_HZ_CLOCK		clk_get(DAVINCI_AUXCLK_CLKID)
 #define CONFIG_SYS_HZ			1000
 #define CONFIG_SKIP_LOWLEVEL_INIT
-#define CONFIG_SKIP_RELOCATE_UBOOT	/* to a proper address, init done */
 
 /*
  * Memory Info
@@ -137,4 +136,9 @@
 #undef CONFIG_CMD_ENV
 #endif
 
+/* additions for new relocation code, must added to all boards */
+#undef CONFIG_SYS_ARM_WITHOUT_RELOC /* This board is tested with relocation support */
+#define CONFIG_SYS_SDRAM_BASE		0xc0000000
+#define CONFIG_SYS_INIT_SP_ADDR		(CONFIG_SYS_SDRAM_BASE + 0x1000 - /* Fix this */ \
+					CONFIG_SYS_GBL_DATA_SIZE)
 #endif /* __CONFIG_H */
diff --git a/include/configs/imx27lite-common.h b/include/configs/imx27lite-common.h
index 33550ba..812e5f2 100644
--- a/include/configs/imx27lite-common.h
+++ b/include/configs/imx27lite-common.h
@@ -235,4 +235,9 @@
 	"mtdids=" MTDIDS_DEFAULT "\0"					\
 	"mtdparts=" MTDPARTS_DEFAULT "\0"				\
 
+/* additions for new relocation code, must added to all boards */
+#undef CONFIG_SYS_ARM_WITHOUT_RELOC /* This board is tested with relocation support */
+#define CONFIG_SYS_SDRAM_BASE		PHYS_SDRAM_1
+#define CONFIG_SYS_INIT_SP_ADDR		(CONFIG_SYS_SDRAM_BASE + 0x1000 - /* Fix this */ \
+					CONFIG_SYS_GBL_DATA_SIZE)
 #endif /* __IMX27LITE_COMMON_CONFIG_H */
diff --git a/include/configs/km_arm.h b/include/configs/km_arm.h
index 1617e69..8673e6f 100644
--- a/include/configs/km_arm.h
+++ b/include/configs/km_arm.h
@@ -180,4 +180,8 @@
 #undef	CONFIG_JFFS2_CMDLINE
 #endif
 
+/* additions for new relocation code, must added to all boards */
+#define CONFIG_SYS_SDRAM_BASE		0x00000000
+#define CONFIG_SYS_INIT_SP_ADDR		(0x00000000 + 0x1000 - /* Fix this */ \
+					CONFIG_SYS_GBL_DATA_SIZE)
 #endif /* _CONFIG_KM_ARM_H */
diff --git a/include/configs/tx25.h b/include/configs/tx25.h
index 013aa35..c798570 100644
--- a/include/configs/tx25.h
+++ b/include/configs/tx25.h
@@ -40,9 +40,9 @@
 /* Start copying real U-boot from the second page */
 #define CONFIG_SYS_NAND_U_BOOT_OFFS	0x800
 #define CONFIG_SYS_NAND_U_BOOT_SIZE	0x30000
-/* Load U-Boot to this address */
-#define CONFIG_SYS_NAND_U_BOOT_DST	0x81f00000
-#define CONFIG_SYS_NAND_U_BOOT_START	CONFIG_SYS_NAND_U_BOOT_DST
+
+#define CONFIG_SYS_NAND_U_BOOT_DST      (0x81fc0000)
+#define CONFIG_SYS_NAND_U_BOOT_START    CONFIG_SYS_NAND_U_BOOT_DST
 
 #define CONFIG_SYS_NAND_PAGE_SIZE	2048
 #define CONFIG_SYS_NAND_SPARE_SIZE	64
@@ -52,7 +52,6 @@
 #define CONFIG_SYS_NAND_BAD_BLOCK_POS	0
 #else
 #define CONFIG_SKIP_LOWLEVEL_INIT
-#define CONFIG_SKIP_RELOCATE_UBOOT
 #endif
 
 #define CONFIG_DISPLAY_CPUINFO
@@ -177,4 +176,10 @@
 	"update=nand erase 0 40000;nand write ${loadaddr} 0 40000\0"	\
 	"upd=run load update\0"						\
 
+/* additions for new relocation code, must added to all boards */
+#undef CONFIG_SYS_ARM_WITHOUT_RELOC /* This board is tested with relocation support */
+#define CONFIG_SYS_SDRAM_BASE		PHYS_SDRAM_1
+#define CONFIG_SYS_INIT_SP_ADDR		(CONFIG_SYS_SDRAM_BASE + 0x1000 - /* Fix this */ \
+					CONFIG_SYS_GBL_DATA_SIZE)
+
 #endif /* __CONFIG_H */
diff --git a/nand_spl/board/karo/tx25/u-boot.lds b/nand_spl/board/karo/tx25/u-boot.lds
index 423bed3..c572557 100644
--- a/nand_spl/board/karo/tx25/u-boot.lds
+++ b/nand_spl/board/karo/tx25/u-boot.lds
@@ -41,11 +41,23 @@
 	.rodata : { *(.rodata) }
 
 	. = ALIGN(4);
-	.data : { *(.data) }
+	.data : {
+		*(.data)
+	__datarel_start = .;
+		*(.data.rel)
+	__datarelrolocal_start = .;
+		*(.data.rel.ro.local)
+	__datarellocal_start = .;
+		*(.data.rel.local)
+	__datarelro_start = .;
+		*(.data.rel.ro)
+	}
 
+	__got_start = .;
 	. = ALIGN(4);
 	.got : { *(.got) }
 
+	__got_end = .;
 	. = .;
 	__u_boot_cmd_start = .;
 	.u_boot_cmd : { *(.u_boot_cmd) }