armv8/ls2085aqds: NAND boot support

This adds NAND boot support for LS2085AQDS, using SPL framework.
Details of forming NAND image can be found in README.

Signed-off-by: Scott Wood <scottwood@freescale.com>
[York Sun: Remove +S from defconfig after commit 252ed872]
Signed-off-by: York Sun <yorksun@freescale.com>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3d0828d..4b7761d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -715,6 +715,7 @@
 	bool "Support ls2085aqds"
 	select ARM64
 	select ARMV8_MULTIENTRY
+	select SUPPORT_SPL
 	help
 	  Support for Freescale LS2085AQDS platform
 	  The LS2085A Development System (QDS) is a high-performance
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/README b/arch/arm/cpu/armv8/fsl-lsch3/README
index 4f36e2a..15a1549 100644
--- a/arch/arm/cpu/armv8/fsl-lsch3/README
+++ b/arch/arm/cpu/armv8/fsl-lsch3/README
@@ -95,3 +95,41 @@
 
 mcmemsize:	MC DRAM block size. If this variable is not defined, the value
 		CONFIG_SYS_LS_MC_DRAM_BLOCK_MIN_SIZE will be assumed.
+
+Booting from NAND
+-------------------
+Booting from NAND requires two images, RCW and u-boot-with-spl.bin.
+The difference between NAND boot RCW image and NOR boot image is the PBI
+command sequence. Below is one example for PBI commands for QDS which uses
+NAND device with 2KB/page, block size 128KB.
+
+1) CCSR 4-byte write to 0x00e00404, data=0x00000000
+2) CCSR 4-byte write to 0x00e00400, data=0x1800a000
+The above two commands set bootloc register to 0x00000000_1800a000 where
+the u-boot code will be running in OCRAM.
+
+3) Block Copy: SRC=0x0107, SRC_ADDR=0x00020000, DEST_ADDR=0x1800a000,
+BLOCK_SIZE=0x00014000
+This command copies u-boot image from NAND device into OCRAM. The values need
+to adjust accordingly.
+
+SRC		should match the cfg_rcw_src, the reset config pins. It depends
+		on the NAND device. See reference manual for cfg_rcw_src.
+SRC_ADDR	is the offset of u-boot-with-spl.bin image in NAND device. In
+		the example above, 128KB. For easy maintenance, we put it at
+		the beginning of next block from RCW.
+DEST_ADDR	is fixed at 0x1800a000, matching bootloc set above.
+BLOCK_SIZE	is the size to be copied by PBI.
+
+RCW image should be written to the beginning of NAND device. Example of using
+u-boot command
+
+nand write <rcw image in memory> 0 <size of rcw image>
+
+To form the NAND image, build u-boot with NAND config, for example,
+ls2085aqds_nand_defconfig. The image needed is u-boot-with-spl.bin.
+The u-boot image should be written to match SRC_ADDR, in above example 0x20000.
+
+nand write <u-boot image in memory> 200000 <size of u-boot image>
+
+With these two images in NAND device, the board can boot from NAND.
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/soc.c b/arch/arm/cpu/armv8/fsl-lsch3/soc.c
index 17700ef..ca00108 100644
--- a/arch/arm/cpu/armv8/fsl-lsch3/soc.c
+++ b/arch/arm/cpu/armv8/fsl-lsch3/soc.c
@@ -6,8 +6,13 @@
 
 #include <common.h>
 #include <fsl_ifc.h>
+#include <nand.h>
+#include <spl.h>
 #include <asm/arch-fsl-lsch3/soc.h>
 #include <asm/io.h>
+#include <asm/global_data.h>
+
+DECLARE_GLOBAL_DATA_PTR;
 
 static void erratum_a008751(void)
 {
@@ -18,8 +23,51 @@
 #endif
 }
 
+static void erratum_rcw_src(void)
+{
+#if defined(CONFIG_SPL)
+	u32 __iomem *dcfg_ccsr = (u32 __iomem *)DCFG_BASE;
+	u32 __iomem *dcfg_dcsr = (u32 __iomem *)DCFG_DCSR_BASE;
+	u32 val;
+
+	val = in_le32(dcfg_ccsr + DCFG_PORSR1 / 4);
+	val &= ~DCFG_PORSR1_RCW_SRC;
+	val |= DCFG_PORSR1_RCW_SRC_NOR;
+	out_le32(dcfg_dcsr + DCFG_DCSR_PORCR1 / 4, val);
+#endif
+}
+
 void fsl_lsch3_early_init_f(void)
 {
 	erratum_a008751();
+	erratum_rcw_src();
 	init_early_memctl_regs();	/* tighten IFC timing */
 }
+
+#ifdef CONFIG_SPL_BUILD
+void board_init_f(ulong dummy)
+{
+	/* Clear global data */
+	memset((void *)gd, 0, sizeof(gd_t));
+
+	arch_cpu_init();
+	board_early_init_f();
+	timer_init();
+	env_init();
+	gd->baudrate = getenv_ulong("baudrate", 10, CONFIG_BAUDRATE);
+
+	serial_init();
+	console_init_f();
+	dram_init();
+
+	/* Clear the BSS. */
+	memset(__bss_start, 0, __bss_end - __bss_start);
+
+	board_init_r(NULL, 0);
+}
+
+u32 spl_boot_device(void)
+{
+	return BOOT_DEVICE_NAND;
+}
+#endif
diff --git a/arch/arm/cpu/armv8/u-boot-spl.lds b/arch/arm/cpu/armv8/u-boot-spl.lds
new file mode 100644
index 0000000..4df339c
--- /dev/null
+++ b/arch/arm/cpu/armv8/u-boot-spl.lds
@@ -0,0 +1,77 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+ *
+ * (C) Copyright 2010
+ * Texas Instruments, <www.ti.com>
+ *	Aneesh V <aneesh@ti.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+MEMORY { .sram : ORIGIN = CONFIG_SPL_TEXT_BASE,
+		LENGTH = CONFIG_SPL_MAX_SIZE }
+MEMORY { .sdram : ORIGIN = CONFIG_SPL_BSS_START_ADDR,
+		LENGTH = CONFIG_SPL_BSS_MAX_SIZE }
+
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+ENTRY(_start)
+SECTIONS
+{
+	.text : {
+		. = ALIGN(8);
+		*(.__image_copy_start)
+		CPUDIR/start.o (.text*)
+		*(.text*)
+	} >.sram
+
+	.rodata : {
+		. = ALIGN(8);
+		*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*)))
+	} >.sram
+
+	.data : {
+		. = ALIGN(8);
+		*(.data*)
+	} >.sram
+
+	.u_boot_list : {
+		. = ALIGN(8);
+		KEEP(*(SORT(.u_boot_list*)));
+	} >.sram
+
+	.image_copy_end : {
+		. = ALIGN(8);
+		*(.__image_copy_end)
+	} >.sram
+
+	.end : {
+		. = ALIGN(8);
+		*(.__end)
+	} >.sram
+
+	.bss_start : {
+		. = ALIGN(8);
+		KEEP(*(.__bss_start));
+	} >.sdram
+
+	.bss : {
+		*(.bss*)
+		 . = ALIGN(8);
+	} >.sdram
+
+	.bss_end : {
+		KEEP(*(.__bss_end));
+	} >.sdram
+
+	/DISCARD/ : { *(.dynsym) }
+	/DISCARD/ : { *(.dynstr*) }
+	/DISCARD/ : { *(.dynamic*) }
+	/DISCARD/ : { *(.plt*) }
+	/DISCARD/ : { *(.interp*) }
+	/DISCARD/ : { *(.gnu*) }
+}
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/config.h b/arch/arm/include/asm/arch-fsl-lsch3/config.h
index 403b2ef..77c20ab 100644
--- a/arch/arm/include/asm/arch-fsl-lsch3/config.h
+++ b/arch/arm/include/asm/arch-fsl-lsch3/config.h
@@ -130,6 +130,15 @@
 #define CCI_MN_DVM_DOMAIN_CTL		0x200
 #define CCI_MN_DVM_DOMAIN_CTL_SET	0x210
 
+/* Device Configuration */
+#define DCFG_BASE		0x01e00000
+#define DCFG_PORSR1			0x000
+#define DCFG_PORSR1_RCW_SRC		0xff800000
+#define DCFG_PORSR1_RCW_SRC_NOR		0x12f00000
+
+#define DCFG_DCSR_BASE		0X700100000ULL
+#define DCFG_DCSR_PORCR1		0x000
+
 /* Supplemental Configuration */
 #define SCFG_BASE		0x01fc0000
 #define SCFG_USB3PRM1CR			0x000
diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
index 1654011..bc9c53c 100644
--- a/arch/arm/lib/crt0_64.S
+++ b/arch/arm/lib/crt0_64.S
@@ -61,7 +61,11 @@
 /*
  * Set up initial C runtime environment and call board_init_f(0).
  */
+#if defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK)
+	ldr	x0, =(CONFIG_SPL_STACK)
+#else
 	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
+#endif
 	sub	x18, x0, #GD_SIZE	/* allocate one GD above SP */
 	bic	x18, x18, #0x7		/* 8-byte alignment for GD */
 zero_gd:
@@ -77,6 +81,7 @@
 	mov	x0, #0
 	bl	board_init_f
 
+#if !defined(CONFIG_SPL_BUILD)
 /*
  * Set up intermediate environment (new sp and gd) and call
  * relocate_code(addr_moni). Trick here is that we'll return
@@ -119,4 +124,6 @@
 
 	/* NOTREACHED - board_init_r() does not return */
 
+#endif /* !CONFIG_SPL_BUILD */
+
 ENDPROC(_main)