armv8: Support loading 32-bit OS in AArch32 execution state

To support loading a 32-bit OS, the execution state will change from
AArch64 to AArch32 when jumping to kernel.

The architecture information will be got through checking FIT image,
then U-Boot will load 32-bit OS or 64-bit OS automatically.

Signed-off-by: Ebony Zhu <ebony.zhu@nxp.com>
Signed-off-by: Alison Wang <alison.wang@nxp.com>
Signed-off-by: Chenhui Zhao <chenhui.zhao@nxp.com>
Reviewed-by: York Sun <york.sun@nxp.com>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9d4d742..49bc9d8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -126,6 +126,12 @@
 	  ARM_SOC_BOOT0_HOOK which contains the required assembler
 	  preprocessor code.
 
+config ARM64_SUPPORT_AARCH32
+	bool "ARM64 system support AArch32 execution state"
+	default y if ARM64 && !TARGET_THUNDERX_88XX
+	help
+	  This ARM64 system supports AArch32 execution state.
+
 choice
 	prompt "Target select"
 	default TARGET_HIKEY
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S b/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
index f7b49cb..72f2c11 100644
--- a/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
+++ b/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
@@ -17,6 +17,7 @@
 #include <asm/arch-fsl-layerscape/immap_lsch3.h>
 #include <asm/arch-fsl-layerscape/soc.h>
 #endif
+#include <asm/u-boot.h>
 
 ENTRY(lowlevel_init)
 	mov	x29, lr			/* Save LR */
@@ -359,11 +360,6 @@
         gic_wait_for_interrupt_m x0, w1
 #endif
 
-	bl secondary_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
-	bl secondary_switch_to_el1
-#endif
-
 slave_cpu:
 	wfe
 	ldr	x0, [x11]
@@ -376,19 +372,64 @@
 	tbz     x1, #25, cpu_is_le
 	rev     x0, x0                  /* BE to LE conversion */
 cpu_is_le:
-	br	x0			/* branch to the given address */
+	ldr	x5, [x11, #24]
+	ldr	x6, =IH_ARCH_DEFAULT
+	cmp	x6, x5
+	b.eq	1f
+
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+	adr	x3, secondary_switch_to_el1
+	ldr	x4, =ES_TO_AARCH64
+#else
+	ldr	x3, [x11]
+	ldr	x4, =ES_TO_AARCH32
+#endif
+	bl	secondary_switch_to_el2
+
+1:
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+	adr	x3, secondary_switch_to_el1
+#else
+	ldr	x3, [x11]
+#endif
+	ldr	x4, =ES_TO_AARCH64
+	bl	secondary_switch_to_el2
+
 ENDPROC(secondary_boot_func)
 
 ENTRY(secondary_switch_to_el2)
-	switch_el x0, 1f, 0f, 0f
+	switch_el x5, 1f, 0f, 0f
 0:	ret
-1:	armv8_switch_to_el2_m x0
+1:	armv8_switch_to_el2_m x3, x4, x5
 ENDPROC(secondary_switch_to_el2)
 
 ENTRY(secondary_switch_to_el1)
-	switch_el x0, 0f, 1f, 0f
+	mrs	x0, mpidr_el1
+	ubfm	x1, x0, #8, #15
+	ubfm	x2, x0, #0, #1
+	orr	x10, x2, x1, lsl #2	/* x10 has LPID */
+
+	lsl	x1, x10, #6
+	ldr	x0, =__spin_table
+	/* physical address of this cpus spin table element */
+	add	x11, x1, x0
+
+	ldr	x3, [x11]
+
+	ldr	x5, [x11, #24]
+	ldr	x6, =IH_ARCH_DEFAULT
+	cmp	x6, x5
+	b.eq	2f
+
+	ldr	x4, =ES_TO_AARCH32
+	bl	switch_to_el1
+
+2:	ldr	x4, =ES_TO_AARCH64
+
+switch_to_el1:
+	switch_el x5, 0f, 1f, 0f
 0:	ret
-1:	armv8_switch_to_el1_m x0, x1
+1:	armv8_switch_to_el1_m x3, x4, x5
 ENDPROC(secondary_switch_to_el1)
 
 	/* Ensure that the literals used by the secondary boot code are
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
index 19c771d..4f5f6d8 100644
--- a/arch/arm/cpu/armv8/start.S
+++ b/arch/arm/cpu/armv8/start.S
@@ -251,9 +251,17 @@
 	/*
 	 * All slaves will enter EL2 and optionally EL1.
 	 */
+	adr	x3, lowlevel_in_el2
+	ldr	x4, =ES_TO_AARCH64
 	bl	armv8_switch_to_el2
+
+lowlevel_in_el2:
 #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+	adr	x3, lowlevel_in_el1
+	ldr	x4, =ES_TO_AARCH64
 	bl	armv8_switch_to_el1
+
+lowlevel_in_el1:
 #endif
 
 #endif /* CONFIG_ARMV8_MULTIENTRY */
diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
index 253a39b..bbccf2b 100644
--- a/arch/arm/cpu/armv8/transition.S
+++ b/arch/arm/cpu/armv8/transition.S
@@ -11,13 +11,24 @@
 #include <asm/macro.h>
 
 ENTRY(armv8_switch_to_el2)
-	switch_el x0, 1f, 0f, 0f
-0:	ret
-1:	armv8_switch_to_el2_m x0
+	switch_el x5, 1f, 0f, 0f
+0:
+	/*
+	 * x3 is kernel entry point or switch_to_el1
+	 * if CONFIG_ARMV8_SWITCH_TO_EL1 is defined.
+         * When running in EL2 now, jump to the
+	 * address saved in x3.
+	 */
+	br x3
+1:	armv8_switch_to_el2_m x3, x4, x5
 ENDPROC(armv8_switch_to_el2)
 
 ENTRY(armv8_switch_to_el1)
-	switch_el x0, 0f, 1f, 0f
-0:	ret
-1:	armv8_switch_to_el1_m x0, x1
+	switch_el x5, 0f, 1f, 0f
+0:
+	/* x3 is kernel entry point. When running in EL1
+	 * now, jump to the address saved in x3.
+	 */
+	br x3
+1:	armv8_switch_to_el1_m x3, x4, x5
 ENDPROC(armv8_switch_to_el1)
diff --git a/arch/arm/include/asm/arch-fsl-layerscape/mp.h b/arch/arm/include/asm/arch-fsl-layerscape/mp.h
index f7306ff..ebf84b6 100644
--- a/arch/arm/include/asm/arch-fsl-layerscape/mp.h
+++ b/arch/arm/include/asm/arch-fsl-layerscape/mp.h
@@ -36,4 +36,8 @@
 int is_core_online(u64 cpu_id);
 u32 cpu_pos_mask(void);
 #endif
+
+#define IH_ARCH_ARM		2	/* ARM */
+#define IH_ARCH_ARM64		22	/* ARM64 */
+
 #endif /* _FSL_LAYERSCAPE_MP_H */
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index 9bb0efa..2553e3e 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -8,6 +8,11 @@
 
 #ifndef __ASM_ARM_MACRO_H__
 #define __ASM_ARM_MACRO_H__
+
+#ifdef CONFIG_ARM64
+#include <asm/system.h>
+#endif
+
 #ifdef __ASSEMBLY__
 
 /*
@@ -135,13 +140,21 @@
 #endif
 .endm
 
-.macro armv8_switch_to_el2_m, xreg1
-	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1 */
-	mov	\xreg1, #0x5b1
-	msr	scr_el3, \xreg1
+/*
+ * Switch from EL3 to EL2 for ARMv8
+ * @ep:     kernel entry point
+ * @flag:   The execution state flag for lower exception
+ *          level, ES_TO_AARCH64 or ES_TO_AARCH32
+ * @tmp:    temporary register
+ *
+ * For loading 32-bit OS, x1 is machine nr and x2 is ftaddr.
+ * For loading 64-bit OS, x0 is physical address to the FDT blob.
+ * They will be passed to the guest.
+ */
+.macro armv8_switch_to_el2_m, ep, flag, tmp
 	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
-	mov	\xreg1, #0x33ff
-	msr	cptr_el2, \xreg1	/* Disable coprocessor traps to EL2 */
+	mov	\tmp, #CPTR_EL2_RES1
+	msr	cptr_el2, \tmp		/* Disable coprocessor traps to EL2 */
 
 	/* Initialize Generic Timers */
 	msr	cntvoff_el2, xzr
@@ -152,45 +165,90 @@
 	 * and RES0 bits (31,30,27,26,24,21,20,17,15-13,10-6) +
 	 * EE,WXN,I,SA,C,A,M to 0
 	 */
-	mov	\xreg1, #0x0830
-	movk	\xreg1, #0x30C5, lsl #16
-	msr	sctlr_el2, \xreg1
+	ldr	\tmp, =(SCTLR_EL2_RES1 | SCTLR_EL2_EE_LE |\
+			SCTLR_EL2_WXN_DIS | SCTLR_EL2_ICACHE_DIS |\
+			SCTLR_EL2_SA_DIS | SCTLR_EL2_DCACHE_DIS |\
+			SCTLR_EL2_ALIGN_DIS | SCTLR_EL2_MMU_DIS)
+	msr	sctlr_el2, \tmp
+
+	mov	\tmp, sp
+	msr	sp_el2, \tmp		/* Migrate SP */
+	mrs	\tmp, vbar_el3
+	msr	vbar_el2, \tmp		/* Migrate VBAR */
+
+	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
+	cmp	\flag, #ES_TO_AARCH32
+	b.eq	1f
+
+	/*
+	 * The next lower exception level is AArch64, 64bit EL2 | HCE |
+	 * SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1.
+	 */
+	ldr	\tmp, =(SCR_EL3_RW_AARCH64 | SCR_EL3_HCE_EN |\
+			SCR_EL3_SMD_DIS | SCR_EL3_RES1 |\
+			SCR_EL3_NS_EN)
+	msr	scr_el3, \tmp
 
 	/* Return to the EL2_SP2 mode from EL3 */
-	mov	\xreg1, sp
-	msr	sp_el2, \xreg1		/* Migrate SP */
-	mrs	\xreg1, vbar_el3
-	msr	vbar_el2, \xreg1	/* Migrate VBAR */
-	mov	\xreg1, #0x3c9
-	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
-	msr	elr_el3, lr
+	ldr	\tmp, =(SPSR_EL_DEBUG_MASK | SPSR_EL_SERR_MASK |\
+			SPSR_EL_IRQ_MASK | SPSR_EL_FIQ_MASK |\
+			SPSR_EL_M_AARCH64 | SPSR_EL_M_EL2H)
+	msr	spsr_el3, \tmp
+	msr	elr_el3, \ep
+	eret
+
+1:
+	/*
+	 * The next lower exception level is AArch32, 32bit EL2 | HCE |
+	 * SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1.
+	 */
+	ldr	\tmp, =(SCR_EL3_RW_AARCH32 | SCR_EL3_HCE_EN |\
+			SCR_EL3_SMD_DIS | SCR_EL3_RES1 |\
+			SCR_EL3_NS_EN)
+	msr	scr_el3, \tmp
+
+	/* Return to AArch32 Hypervisor mode */
+	ldr     \tmp, =(SPSR_EL_END_LE | SPSR_EL_ASYN_MASK |\
+			SPSR_EL_IRQ_MASK | SPSR_EL_FIQ_MASK |\
+			SPSR_EL_T_A32 | SPSR_EL_M_AARCH32 |\
+			SPSR_EL_M_HYP)
+	msr	spsr_el3, \tmp
+	msr     elr_el3, \ep
 	eret
 .endm
 
-.macro armv8_switch_to_el1_m, xreg1, xreg2
+/*
+ * Switch from EL2 to EL1 for ARMv8
+ * @ep:     kernel entry point
+ * @flag:   The execution state flag for lower exception
+ *          level, ES_TO_AARCH64 or ES_TO_AARCH32
+ * @tmp:    temporary register
+ *
+ * For loading 32-bit OS, x1 is machine nr and x2 is ftaddr.
+ * For loading 64-bit OS, x0 is physical address to the FDT blob.
+ * They will be passed to the guest.
+ */
+.macro armv8_switch_to_el1_m, ep, flag, tmp
 	/* Initialize Generic Timers */
-	mrs	\xreg1, cnthctl_el2
-	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
-	msr	cnthctl_el2, \xreg1
+	mrs	\tmp, cnthctl_el2
+	/* Enable EL1 access to timers */
+	orr	\tmp, \tmp, #(CNTHCTL_EL2_EL1PCEN_EN |\
+		CNTHCTL_EL2_EL1PCTEN_EN)
+	msr	cnthctl_el2, \tmp
 	msr	cntvoff_el2, xzr
 
 	/* Initilize MPID/MPIDR registers */
-	mrs	\xreg1, midr_el1
-	mrs	\xreg2, mpidr_el1
-	msr	vpidr_el2, \xreg1
-	msr	vmpidr_el2, \xreg2
+	mrs	\tmp, midr_el1
+	msr	vpidr_el2, \tmp
+	mrs	\tmp, mpidr_el1
+	msr	vmpidr_el2, \tmp
 
 	/* Disable coprocessor traps */
-	mov	\xreg1, #0x33ff
-	msr	cptr_el2, \xreg1	/* Disable coprocessor traps to EL2 */
+	mov	\tmp, #CPTR_EL2_RES1
+	msr	cptr_el2, \tmp		/* Disable coprocessor traps to EL2 */
 	msr	hstr_el2, xzr		/* Disable coprocessor traps to EL2 */
-	mov	\xreg1, #3 << 20
-	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
-
-	/* Initialize HCR_EL2 */
-	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
-	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
-	msr	hcr_el2, \xreg1
+	mov	\tmp, #CPACR_EL1_FPEN_EN
+	msr	cpacr_el1, \tmp		/* Enable FP/SIMD at EL1 */
 
 	/* SCTLR_EL1 initialization
 	 *
@@ -199,18 +257,50 @@
 	 * UCI,EE,EOE,WXN,nTWE,nTWI,UCT,DZE,I,UMA,SED,ITD,
 	 * CP15BEN,SA0,SA,C,A,M to 0
 	 */
-	mov	\xreg1, #0x0800
-	movk	\xreg1, #0x30d0, lsl #16
-	msr	sctlr_el1, \xreg1
+	ldr	\tmp, =(SCTLR_EL1_RES1 | SCTLR_EL1_UCI_DIS |\
+			SCTLR_EL1_EE_LE | SCTLR_EL1_WXN_DIS |\
+			SCTLR_EL1_NTWE_DIS | SCTLR_EL1_NTWI_DIS |\
+			SCTLR_EL1_UCT_DIS | SCTLR_EL1_DZE_DIS |\
+			SCTLR_EL1_ICACHE_DIS | SCTLR_EL1_UMA_DIS |\
+			SCTLR_EL1_SED_EN | SCTLR_EL1_ITD_EN |\
+			SCTLR_EL1_CP15BEN_DIS | SCTLR_EL1_SA0_DIS |\
+			SCTLR_EL1_SA_DIS | SCTLR_EL1_DCACHE_DIS |\
+			SCTLR_EL1_ALIGN_DIS | SCTLR_EL1_MMU_DIS)
+	msr	sctlr_el1, \tmp
+
+	mov	\tmp, sp
+	msr	sp_el1, \tmp		/* Migrate SP */
+	mrs	\tmp, vbar_el2
+	msr	vbar_el1, \tmp		/* Migrate VBAR */
+
+	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
+	cmp	\flag, #ES_TO_AARCH32
+	b.eq	1f
+
+	/* Initialize HCR_EL2 */
+	ldr	\tmp, =(HCR_EL2_RW_AARCH64 | HCR_EL2_HCD_DIS)
+	msr	hcr_el2, \tmp
 
 	/* Return to the EL1_SP1 mode from EL2 */
-	mov	\xreg1, sp
-	msr	sp_el1, \xreg1		/* Migrate SP */
-	mrs	\xreg1, vbar_el2
-	msr	vbar_el1, \xreg1	/* Migrate VBAR */
-	mov	\xreg1, #0x3c5
-	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
-	msr	elr_el2, lr
+	ldr	\tmp, =(SPSR_EL_DEBUG_MASK | SPSR_EL_SERR_MASK |\
+			SPSR_EL_IRQ_MASK | SPSR_EL_FIQ_MASK |\
+			SPSR_EL_M_AARCH64 | SPSR_EL_M_EL1H)
+	msr	spsr_el2, \tmp
+	msr     elr_el2, \ep
+	eret
+
+1:
+	/* Initialize HCR_EL2 */
+	ldr	\tmp, =(HCR_EL2_RW_AARCH32 | HCR_EL2_HCD_DIS)
+	msr	hcr_el2, \tmp
+
+	/* Return to AArch32 Supervisor mode from EL2 */
+	ldr	\tmp, =(SPSR_EL_END_LE | SPSR_EL_ASYN_MASK |\
+			SPSR_EL_IRQ_MASK | SPSR_EL_FIQ_MASK |\
+			SPSR_EL_T_A32 | SPSR_EL_M_AARCH32 |\
+			SPSR_EL_M_SVC)
+	msr     spsr_el2, \tmp
+	msr     elr_el2, \ep
 	eret
 .endm
 
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 574a0e7..287ff5c 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -18,6 +18,95 @@
 #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
 #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
 
+#define ES_TO_AARCH64		1
+#define ES_TO_AARCH32		0
+
+/*
+ * SCR_EL3 bits definitions
+ */
+#define SCR_EL3_RW_AARCH64	(1 << 10) /* Next lower level is AArch64     */
+#define SCR_EL3_RW_AARCH32	(0 << 10) /* Lower lowers level are AArch32  */
+#define SCR_EL3_HCE_EN		(1 << 8)  /* Hypervisor Call enable          */
+#define SCR_EL3_SMD_DIS		(1 << 7)  /* Secure Monitor Call disable     */
+#define SCR_EL3_RES1		(3 << 4)  /* Reserved, RES1                  */
+#define SCR_EL3_NS_EN		(1 << 0)  /* EL0 and EL1 in Non-scure state  */
+
+/*
+ * SPSR_EL3/SPSR_EL2 bits definitions
+ */
+#define SPSR_EL_END_LE		(0 << 9)  /* Exception Little-endian          */
+#define SPSR_EL_DEBUG_MASK	(1 << 9)  /* Debug exception masked           */
+#define SPSR_EL_ASYN_MASK	(1 << 8)  /* Asynchronous data abort masked   */
+#define SPSR_EL_SERR_MASK	(1 << 8)  /* System Error exception masked    */
+#define SPSR_EL_IRQ_MASK	(1 << 7)  /* IRQ exception masked             */
+#define SPSR_EL_FIQ_MASK	(1 << 6)  /* FIQ exception masked             */
+#define SPSR_EL_T_A32		(0 << 5)  /* AArch32 instruction set A32      */
+#define SPSR_EL_M_AARCH64	(0 << 4)  /* Exception taken from AArch64     */
+#define SPSR_EL_M_AARCH32	(1 << 4)  /* Exception taken from AArch32     */
+#define SPSR_EL_M_SVC		(0x3)     /* Exception taken from SVC mode    */
+#define SPSR_EL_M_HYP		(0xa)     /* Exception taken from HYP mode    */
+#define SPSR_EL_M_EL1H		(5)       /* Exception taken from EL1h mode   */
+#define SPSR_EL_M_EL2H		(9)       /* Exception taken from EL2h mode   */
+
+/*
+ * CPTR_EL2 bits definitions
+ */
+#define CPTR_EL2_RES1		(3 << 12 | 0x3ff)           /* Reserved, RES1 */
+
+/*
+ * SCTLR_EL2 bits definitions
+ */
+#define SCTLR_EL2_RES1		(3 << 28 | 3 << 22 | 1 << 18 | 1 << 16 |\
+				 1 << 11 | 3 << 4)	    /* Reserved, RES1 */
+#define SCTLR_EL2_EE_LE		(0 << 25) /* Exception Little-endian          */
+#define SCTLR_EL2_WXN_DIS	(0 << 19) /* Write permission is not XN       */
+#define SCTLR_EL2_ICACHE_DIS	(0 << 12) /* Instruction cache disabled       */
+#define SCTLR_EL2_SA_DIS	(0 << 3)  /* Stack Alignment Check disabled   */
+#define SCTLR_EL2_DCACHE_DIS	(0 << 2)  /* Data cache disabled              */
+#define SCTLR_EL2_ALIGN_DIS	(0 << 1)  /* Alignment check disabled         */
+#define SCTLR_EL2_MMU_DIS	(0)       /* MMU disabled                     */
+
+/*
+ * CNTHCTL_EL2 bits definitions
+ */
+#define CNTHCTL_EL2_EL1PCEN_EN	(1 << 1)  /* Physical timer regs accessible   */
+#define CNTHCTL_EL2_EL1PCTEN_EN	(1 << 0)  /* Physical counter accessible      */
+
+/*
+ * HCR_EL2 bits definitions
+ */
+#define HCR_EL2_RW_AARCH64	(1 << 31) /* EL1 is AArch64                   */
+#define HCR_EL2_RW_AARCH32	(0 << 31) /* Lower levels are AArch32         */
+#define HCR_EL2_HCD_DIS		(1 << 29) /* Hypervisor Call disabled         */
+
+/*
+ * CPACR_EL1 bits definitions
+ */
+#define CPACR_EL1_FPEN_EN	(3 << 20) /* SIMD and FP instruction enabled  */
+
+/*
+ * SCTLR_EL1 bits definitions
+ */
+#define SCTLR_EL1_RES1		(3 << 28 | 3 << 22 | 1 << 20 |\
+				 1 << 11) /* Reserved, RES1                   */
+#define SCTLR_EL1_UCI_DIS	(0 << 26) /* Cache instruction disabled       */
+#define SCTLR_EL1_EE_LE		(0 << 25) /* Exception Little-endian          */
+#define SCTLR_EL1_WXN_DIS	(0 << 19) /* Write permission is not XN       */
+#define SCTLR_EL1_NTWE_DIS	(0 << 18) /* WFE instruction disabled         */
+#define SCTLR_EL1_NTWI_DIS	(0 << 16) /* WFI instruction disabled         */
+#define SCTLR_EL1_UCT_DIS	(0 << 15) /* CTR_EL0 access disabled          */
+#define SCTLR_EL1_DZE_DIS	(0 << 14) /* DC ZVA instruction disabled      */
+#define SCTLR_EL1_ICACHE_DIS	(0 << 12) /* Instruction cache disabled       */
+#define SCTLR_EL1_UMA_DIS	(0 << 9)  /* User Mask Access disabled        */
+#define SCTLR_EL1_SED_EN	(0 << 8)  /* SETEND instruction enabled       */
+#define SCTLR_EL1_ITD_EN	(0 << 7)  /* IT instruction enabled           */
+#define SCTLR_EL1_CP15BEN_DIS	(0 << 5)  /* CP15 barrier operation disabled  */
+#define SCTLR_EL1_SA0_DIS	(0 << 4)  /* Stack Alignment EL0 disabled     */
+#define SCTLR_EL1_SA_DIS	(0 << 3)  /* Stack Alignment EL1 disabled     */
+#define SCTLR_EL1_DCACHE_DIS	(0 << 2)  /* Data cache disabled              */
+#define SCTLR_EL1_ALIGN_DIS	(0 << 1)  /* Alignment check disabled         */
+#define SCTLR_EL1_MMU_DIS	(0)       /* MMU disabled                     */
+
 #ifndef __ASSEMBLY__
 
 u64 get_page_table_size(void);
@@ -98,8 +187,34 @@
 int __asm_invalidate_l3_icache(void);
 void __asm_switch_ttbr(u64 new_ttbr);
 
-void armv8_switch_to_el2(void);
-void armv8_switch_to_el1(void);
+/*
+ * Switch from EL3 to EL2 for ARMv8
+ *
+ * @args:        For loading 64-bit OS, fdt address.
+ *               For loading 32-bit OS, zero.
+ * @mach_nr:     For loading 64-bit OS, zero.
+ *               For loading 32-bit OS, machine nr
+ * @fdt_addr:    For loading 64-bit OS, zero.
+ *               For loading 32-bit OS, fdt address.
+ * @entry_point: kernel entry point
+ * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
+ */
+void armv8_switch_to_el2(u64 args, u64 mach_nr, u64 fdt_addr,
+			 u64 entry_point, u64 es_flag);
+/*
+ * Switch from EL2 to EL1 for ARMv8
+ *
+ * @args:        For loading 64-bit OS, fdt address.
+ *               For loading 32-bit OS, zero.
+ * @mach_nr:     For loading 64-bit OS, zero.
+ *               For loading 32-bit OS, machine nr
+ * @fdt_addr:    For loading 64-bit OS, zero.
+ *               For loading 32-bit OS, fdt address.
+ * @entry_point: kernel entry point
+ * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
+ */
+void armv8_switch_to_el1(u64 args, u64 mach_nr, u64 fdt_addr,
+			 u64 entry_point, u64 es_flag);
 void gic_init(void);
 void gic_send_sgi(unsigned long sgino);
 void wait_for_wakeup(void);
diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
index dedcd1e..6f3be8b 100644
--- a/arch/arm/lib/bootm.c
+++ b/arch/arm/lib/bootm.c
@@ -200,10 +200,6 @@
 {
 	smp_kick_all_cpus();
 	dcache_disable();	/* flush cache before swtiching to EL2 */
-	armv8_switch_to_el2();
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
-	armv8_switch_to_el1();
-#endif
 }
 #endif
 
@@ -280,6 +276,24 @@
 }
 #endif
 
+#ifdef CONFIG_ARM64
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+static void switch_to_el1(void)
+{
+	if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
+	    (images.os.arch == IH_ARCH_ARM))
+		armv8_switch_to_el1(0, (u64)gd->bd->bi_arch_number,
+				    (u64)images.ft_addr,
+				    (u64)images.ep,
+				    ES_TO_AARCH32);
+	else
+		armv8_switch_to_el1((u64)images.ft_addr, 0, 0,
+				    images.ep,
+				    ES_TO_AARCH64);
+}
+#endif
+#endif
+
 /* Subcommand: GO */
 static void boot_jump_linux(bootm_headers_t *images, int flag)
 {
@@ -299,7 +313,22 @@
 
 	if (!fake) {
 		do_nonsec_virt_switch();
-		kernel_entry(images->ft_addr, NULL, NULL, NULL);
+
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+		armv8_switch_to_el2((u64)images->ft_addr, 0, 0,
+				    (u64)switch_to_el1, ES_TO_AARCH64);
+#else
+		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
+		    (images->os.arch == IH_ARCH_ARM))
+			armv8_switch_to_el2(0, (u64)gd->bd->bi_arch_number,
+					    (u64)images->ft_addr,
+					    (u64)images->ep,
+					    ES_TO_AARCH32);
+		else
+			armv8_switch_to_el2((u64)images->ft_addr, 0, 0,
+					    images->ep,
+					    ES_TO_AARCH64);
+#endif
 	}
 #else
 	unsigned long machid = gd->bd->bi_arch_number;
diff --git a/arch/arm/mach-rmobile/lowlevel_init_gen3.S b/arch/arm/mach-rmobile/lowlevel_init_gen3.S
index 88ff56e..11acce0 100644
--- a/arch/arm/mach-rmobile/lowlevel_init_gen3.S
+++ b/arch/arm/mach-rmobile/lowlevel_init_gen3.S
@@ -61,11 +61,18 @@
 	/*
 	 * All slaves will enter EL2 and optionally EL1.
 	 */
+	adr	x3, lowlevel_in_el2
+	ldr	x4, =ES_TO_AARCH64
 	bl	armv8_switch_to_el2
-#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
-	bl	armv8_switch_to_el1
-#endif
 
+lowlevel_in_el2:
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+	adr	x3, lowlevel_in_el1
+	ldr	x4, =ES_TO_AARCH64
+	bl	armv8_switch_to_el1
+
+lowlevel_in_el1:
+#endif
 #endif /* CONFIG_ARMV8_MULTIENTRY */
 
 	bl      s_init