arm: Switch 32-bit ARM to using generic global_data setup

There is quite a bit of assembler code that can be removed if we use the
generic global_data setup. Less arch-specific code makes it easier to add
new features and maintain the start-up code.

Drop the unneeded code and adjust the hooks in board_f.c to cope.

Signed-off-by: Simon Glass <sjg@chromium.org>
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S
index 4c3a94a..80548eb 100644
--- a/arch/arm/lib/crt0.S
+++ b/arch/arm/lib/crt0.S
@@ -82,31 +82,11 @@
 #else
 	bic	sp, sp, #7	/* 8-byte alignment for ABI compliance */
 #endif
-	mov	r2, sp
-	sub	sp, sp, #GD_SIZE	/* allocate one GD above SP */
-#if defined(CONFIG_CPU_V7M)	/* v7M forbids using SP as BIC destination */
-	mov	r3, sp
-	bic	r3, r3, #7
-	mov	sp, r3
-#else
-	bic	sp, sp, #7	/* 8-byte alignment for ABI compliance */
-#endif
-	mov	r9, sp		/* GD is above SP */
-	mov	r1, sp
+	mov	r0, sp
+	bl	board_init_f_mem
+	mov	sp, r0
+
 	mov	r0, #0
-clr_gd:
-	cmp	r1, r2			/* while not at end of GD */
-#if defined(CONFIG_CPU_V7M)
-	itt	lo
-#endif
-	strlo	r0, [r1]		/* clear 32-bit GD word */
-	addlo	r1, r1, #4		/* move to next */
-	blo	clr_gd
-#if defined(CONFIG_SYS_MALLOC_F_LEN)
-	sub	sp, sp, #CONFIG_SYS_MALLOC_F_LEN
-	str	sp, [r9, #GD_MALLOC_BASE]
-#endif
-	/* mov r0, #0 not needed due to above code */
 	bl	board_init_f
 
 #if ! defined(CONFIG_SPL_BUILD)