arm: move C runtime setup code in crt0.S

Move all the C runtime setup code from every start.S
in arch/arm into arch/arm/lib/crt0.S. This covers
the code sequence from setting up the initial stack
to calling into board_init_r().

Also, rewrite the C runtime setup and make functions
board_init_*() and relocate_code() behave according to
normal C semantics (no jumping across the C stack any
more, etc).

Some SPL targets had to be touched because they use
start.S explicitly or for some reason; the relevant
maintainers and custodians are cc:ed.

Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S
new file mode 100644
index 0000000..4f60958
--- /dev/null
+++ b/arch/arm/lib/crt0.S
@@ -0,0 +1,173 @@
+/*
+ *  crt0 - C-runtime startup Code for ARM U-Boot
+ *
+ *  Copyright (c) 2012  Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include <asm-offsets.h>
+
+/*
+ * This file handles the target-independent stages of the U-Boot
+ * start-up where a C runtime environment is needed. Its entry point
+ * is _main and is branched into from the target's start.S file.
+ *
+ * _main execution sequence is:
+ *
+ * 1. Set up initial environment for calling board_init_f().
+ *    This environment only provides a stack and a place to store
+ *    the GD ('global data') structure, both located in some readily
+ *    available RAM (SRAM, locked cache...). In this context, VARIABLE
+ *    global data, initialized or not (BSS), are UNAVAILABLE; only
+ *    CONSTANT initialized data are available.
+ *
+ * 2. Call board_init_f(). This function prepares the hardware for
+ *    execution from system RAM (DRAM, DDR...) As system RAM may not
+ *    be available yet, , board_init_f() must use the current GD to
+ *    store any data which must be passed on to later stages. These
+ *    data include the relocation destination, the future stack, and
+ *    the future GD location.
+ *
+ * (the following applies only to non-SPL builds)
+ *
+ * 3. Set up intermediate environment where the stack and GD are the
+ *    ones allocated by board_init_f() in system RAM, but BSS and
+ *    initialized non-const data are still not available.
+ *
+ * 4. Call relocate_code(). This function relocates U-Boot from its
+ *    current location into the relocation destination computed by
+ *    board_init_f().
+ *
+ * 5. Set up final environment for calling board_init_r(). This
+ *    environment has BSS (initialized to 0), initialized non-const
+ *    data (initialized to their intended value), and stack in system
+ *    RAM. GD has retained values set by board_init_f(). Some CPUs
+ *    have some work left to do at this point regarding memory, so
+ *    call c_runtime_cpu_setup.
+ *
+ * 6. Branch to either nand_boot() or board_init_r().
+ */
+
+/*
+ * declare nand_boot() or board_init_r() to jump to at end of crt0
+ */
+
+#if defined(CONFIG_NAND_SPL)
+
+.globl nand_boot
+
+#elif ! defined(CONFIG_SPL_BUILD)
+
+.globl board_init_r
+
+#endif
+
+/*
+ * start and end of BSS
+ */
+
+.globl __bss_start
+.globl __bss_end__
+
+/*
+ * entry point of crt0 sequence
+ */
+
+.global _main
+
+_main:
+
+/*
+ * Set up initial C runtime environment and call board_init_f(0).
+ */
+
+#if defined(CONFIG_NAND_SPL)
+	/* deprecated, use instead CONFIG_SPL_BUILD */
+	ldr	sp, =(CONFIG_SYS_INIT_SP_ADDR)
+#elif defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK)
+	ldr	sp, =(CONFIG_SPL_STACK)
+#else
+	ldr	sp, =(CONFIG_SYS_INIT_SP_ADDR)
+#endif
+	bic	sp, sp, #7	/* 8-byte alignment for ABI compliance */
+	sub	sp, #GD_SIZE	/* allocate one GD above SP */
+	bic	sp, sp, #7	/* 8-byte alignment for ABI compliance */
+	mov	r8, sp		/* GD is above SP */
+	mov	r0, #0
+	bl	board_init_f
+
+#if ! defined(CONFIG_SPL_BUILD)
+
+/*
+ * Set up intermediate environment (new sp and gd) and call
+ * relocate_code(addr_sp, gd, addr_moni). Trick here is that
+ * we'll return 'here' but relocated.
+ */
+
+	ldr	sp, [r8, #GD_START_ADDR_SP]	/* r8 = gd->start_addr_sp */
+	bic	sp, sp, #7	/* 8-byte alignment for ABI compliance */
+	ldr	r8, [r8, #GD_BD]		/* r8 = gd->bd */
+	sub	r8, r8, #GD_SIZE		/* new GD is below bd */
+
+	adr	lr, here
+	ldr	r0, [r8, #GD_RELOC_OFF]		/* lr = gd->start_addr_sp */
+	add	lr, lr, r0
+	ldr	r0, [r8, #GD_START_ADDR_SP]	/* r0 = gd->start_addr_sp */
+	mov	r1, r8				/* r1 = gd */
+	ldr	r2, [r8, #GD_RELOCADDR]		/* r2 = gd->relocaddr */
+	b	relocate_code
+here:
+
+/* Set up final (full) environment */
+
+	bl	c_runtime_cpu_setup	/* we still call old routine here */
+
+	ldr	r0, =__bss_start	/* this is auto-relocated! */
+	ldr	r1, =__bss_end__	/* this is auto-relocated! */
+
+	mov	r2, #0x00000000		/* prepare zero to clear BSS */
+
+clbss_l:cmp	r0, r1			/* while not at end of BSS */
+	strlo	r2, [r0]		/* clear 32-bit BSS word */
+	addlo	r0, r0, #4		/* move to next */
+	blo	clbss_l
+
+	bl coloured_LED_init
+	bl red_led_on
+
+#if defined(CONFIG_NAND_SPL)
+
+	/* call _nand_boot() */
+	ldr     pc, =nand_boot
+
+#else
+
+	/* call board_init_r(gd_t *id, ulong dest_addr) */
+	mov	r0, r8			/* gd_t */
+	ldr	r1, [r8, #GD_RELOCADDR]	/* dest_addr */
+	/* call board_init_r */
+	ldr	pc, =board_init_r	/* this is auto-relocated! */
+
+#endif
+
+	/* we should not return here. */
+
+#endif