arm64: core support

Relocation code based on a patch by Scott Wood, which is:
Signed-off-by: Scott Wood <scottwood@freescale.com>

Signed-off-by: David Feng <fenghua@phytium.com.cn>
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
new file mode 100644
index 0000000..b6eb6de
--- /dev/null
+++ b/arch/arm/cpu/armv8/Makefile
@@ -0,0 +1,17 @@
+#
+# (C) Copyright 2000-2003
+# Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+#
+# SPDX-License-Identifier:	GPL-2.0+
+#
+
+extra-y	:= start.o
+
+obj-y	+= cpu.o
+obj-y	+= generic_timer.o
+obj-y	+= cache_v8.o
+obj-y	+= exceptions.o
+obj-y	+= cache.o
+obj-y	+= tlb.o
+obj-y	+= gic.o
+obj-y	+= transition.o
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
new file mode 100644
index 0000000..546a83e
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache.S
@@ -0,0 +1,136 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * This file is based on sample code from ARMv8 ARM.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * void __asm_flush_dcache_level(level)
+ *
+ * clean and invalidate one level cache.
+ *
+ * x0: cache level
+ * x1~x9: clobbered
+ */
+ENTRY(__asm_flush_dcache_level)
+	lsl	x1, x0, #1
+	msr	csselr_el1, x1		/* select cache level */
+	isb				/* sync change of cssidr_el1 */
+	mrs	x6, ccsidr_el1		/* read the new cssidr_el1 */
+	and	x2, x6, #7		/* x2 <- log2(cache line size)-4 */
+	add	x2, x2, #4		/* x2 <- log2(cache line size) */
+	mov	x3, #0x3ff
+	and	x3, x3, x6, lsr #3	/* x3 <- max number of #ways */
+	add	w4, w3, w3
+	sub	w4, w4, 1		/* round up log2(#ways + 1) */
+	clz	w5, w4			/* bit position of #ways */
+	mov	x4, #0x7fff
+	and	x4, x4, x6, lsr #13	/* x4 <- max number of #sets */
+	/* x1 <- cache level << 1 */
+	/* x2 <- line length offset */
+	/* x3 <- number of cache ways - 1 */
+	/* x4 <- number of cache sets - 1 */
+	/* x5 <- bit position of #ways */
+
+loop_set:
+	mov	x6, x3			/* x6 <- working copy of #ways */
+loop_way:
+	lsl	x7, x6, x5
+	orr	x9, x1, x7		/* map way and level to cisw value */
+	lsl	x7, x4, x2
+	orr	x9, x9, x7		/* map set number to cisw value */
+	dc	cisw, x9		/* clean & invalidate by set/way */
+	subs	x6, x6, #1		/* decrement the way */
+	b.ge	loop_way
+	subs	x4, x4, #1		/* decrement the set */
+	b.ge	loop_set
+
+	ret
+ENDPROC(__asm_flush_dcache_level)
+
+/*
+ * void __asm_flush_dcache_all(void)
+ *
+ * clean and invalidate all data cache by SET/WAY.
+ */
+ENTRY(__asm_flush_dcache_all)
+	dsb	sy
+	mrs	x10, clidr_el1		/* read clidr_el1 */
+	lsr	x11, x10, #24
+	and	x11, x11, #0x7		/* x11 <- loc */
+	cbz	x11, finished		/* if loc is 0, exit */
+	mov	x15, lr
+	mov	x0, #0			/* start flush at cache level 0 */
+	/* x0  <- cache level */
+	/* x10 <- clidr_el1 */
+	/* x11 <- loc */
+	/* x15 <- return address */
+
+loop_level:
+	lsl	x1, x0, #1
+	add	x1, x1, x0		/* x0 <- tripled cache level */
+	lsr	x1, x10, x1
+	and	x1, x1, #7		/* x1 <- cache type */
+	cmp	x1, #2
+	b.lt	skip			/* skip if no cache or icache */
+	bl	__asm_flush_dcache_level
+skip:
+	add	x0, x0, #1		/* increment cache level */
+	cmp	x11, x0
+	b.gt	loop_level
+
+	mov	x0, #0
+	msr	csselr_el1, x0		/* resotre csselr_el1 */
+	dsb	sy
+	isb
+	mov	lr, x15
+
+finished:
+	ret
+ENDPROC(__asm_flush_dcache_all)
+
+/*
+ * void __asm_flush_dcache_range(start, end)
+ *
+ * clean & invalidate data cache in the range
+ *
+ * x0: start address
+ * x1: end address
+ */
+ENTRY(__asm_flush_dcache_range)
+	mrs	x3, ctr_el0
+	lsr	x3, x3, #16
+	and	x3, x3, #0xf
+	mov	x2, #4
+	lsl	x2, x2, x3		/* cache line size */
+
+	/* x2 <- minimal cache line size in cache system */
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0	/* clean & invalidate data or unified cache */
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__asm_flush_dcache_range)
+
+/*
+ * void __asm_invalidate_icache_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_icache_all)
+	ic	ialluis
+	isb	sy
+	ret
+ENDPROC(__asm_invalidate_icache_all)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
new file mode 100644
index 0000000..131fdab
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -0,0 +1,219 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/system.h>
+#include <asm/armv8/mmu.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+
+static void set_pgtable_section(u64 section, u64 memory_type)
+{
+	u64 *page_table = (u64 *)gd->arch.tlb_addr;
+	u64 value;
+
+	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
+	value |= PMD_ATTRINDX(memory_type);
+	page_table[section] = value;
+}
+
+/* to activate the MMU we need to set up virtual memory */
+static void mmu_setup(void)
+{
+	int i, j, el;
+	bd_t *bd = gd->bd;
+
+	/* Setup an identity-mapping for all spaces */
+	for (i = 0; i < (PGTABLE_SIZE >> 3); i++)
+		set_pgtable_section(i, MT_DEVICE_NGNRNE);
+
+	/* Setup an identity-mapping for all RAM space */
+	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+		ulong start = bd->bi_dram[i].start;
+		ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
+		for (j = start >> SECTION_SHIFT;
+		     j < end >> SECTION_SHIFT; j++) {
+			set_pgtable_section(j, MT_NORMAL);
+		}
+	}
+
+	/* load TTBR0 */
+	el = current_el();
+	if (el == 1)
+		asm volatile("msr ttbr0_el1, %0"
+			     : : "r" (gd->arch.tlb_addr) : "memory");
+	else if (el == 2)
+		asm volatile("msr ttbr0_el2, %0"
+			     : : "r" (gd->arch.tlb_addr) : "memory");
+	else
+		asm volatile("msr ttbr0_el3, %0"
+			     : : "r" (gd->arch.tlb_addr) : "memory");
+
+	/* enable the mmu */
+	set_sctlr(get_sctlr() | CR_M);
+}
+
+/*
+ * Performs a invalidation of the entire data cache at all levels
+ */
+void invalidate_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+}
+
+/*
+ * Performs a clean & invalidation of the entire data cache at all levels
+ */
+void flush_dcache_all(void)
+{
+	__asm_flush_dcache_all();
+}
+
+/*
+ * Invalidates range in all levels of D-cache/unified cache
+ */
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+}
+
+/*
+ * Flush range(clean & invalidate) from all levels of D-cache/unified cache
+ */
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+	__asm_flush_dcache_range(start, stop);
+}
+
+void dcache_enable(void)
+{
+	/* The data cache is not active unless the mmu is enabled */
+	if (!(get_sctlr() & CR_M)) {
+		invalidate_dcache_all();
+		__asm_invalidate_tlb_all();
+		mmu_setup();
+	}
+
+	set_sctlr(get_sctlr() | CR_C);
+}
+
+void dcache_disable(void)
+{
+	uint32_t sctlr;
+
+	sctlr = get_sctlr();
+
+	/* if cache isn't enabled no need to disable */
+	if (!(sctlr & CR_C))
+		return;
+
+	set_sctlr(sctlr & ~(CR_C|CR_M));
+
+	flush_dcache_all();
+	__asm_invalidate_tlb_all();
+}
+
+int dcache_status(void)
+{
+	return (get_sctlr() & CR_C) != 0;
+}
+
+#else	/* CONFIG_SYS_DCACHE_OFF */
+
+void invalidate_dcache_all(void)
+{
+}
+
+void flush_dcache_all(void)
+{
+}
+
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void dcache_enable(void)
+{
+}
+
+void dcache_disable(void)
+{
+}
+
+int dcache_status(void)
+{
+	return 0;
+}
+
+#endif	/* CONFIG_SYS_DCACHE_OFF */
+
+#ifndef CONFIG_SYS_ICACHE_OFF
+
+void icache_enable(void)
+{
+	set_sctlr(get_sctlr() | CR_I);
+}
+
+void icache_disable(void)
+{
+	set_sctlr(get_sctlr() & ~CR_I);
+}
+
+int icache_status(void)
+{
+	return (get_sctlr() & CR_I) != 0;
+}
+
+void invalidate_icache_all(void)
+{
+	__asm_invalidate_icache_all();
+}
+
+#else	/* CONFIG_SYS_ICACHE_OFF */
+
+void icache_enable(void)
+{
+}
+
+void icache_disable(void)
+{
+}
+
+int icache_status(void)
+{
+	return 0;
+}
+
+void invalidate_icache_all(void)
+{
+}
+
+#endif	/* CONFIG_SYS_ICACHE_OFF */
+
+/*
+ * Enable dCache & iCache, whether cache is actually enabled
+ * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
+ */
+void enable_caches(void)
+{
+	icache_enable();
+	dcache_enable();
+}
+
+/*
+ * Flush range from all levels of d-cache/unified-cache
+ */
+void flush_cache(unsigned long start, unsigned long size)
+{
+	flush_dcache_range(start, start + size);
+}
diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
new file mode 100644
index 0000000..027a68c
--- /dev/null
+++ b/arch/arm/cpu/armv8/config.mk
@@ -0,0 +1,15 @@
+#
+# (C) Copyright 2002
+# Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+#
+# SPDX-License-Identifier:	GPL-2.0+
+#
+PLATFORM_RELFLAGS += -fno-common -ffixed-x18
+
+# SEE README.arm-unaligned-accesses
+PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
+PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
+
+PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
+PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
+PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
new file mode 100644
index 0000000..e06c3cc
--- /dev/null
+++ b/arch/arm/cpu/armv8/cpu.c
@@ -0,0 +1,43 @@
+/*
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroeger@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <command.h>
+#include <asm/system.h>
+#include <linux/compiler.h>
+
+int cleanup_before_linux(void)
+{
+	/*
+	 * this function is called just before we call linux
+	 * it prepares the processor for linux
+	 *
+	 * disable interrupt and turn off caches etc ...
+	 */
+	disable_interrupts();
+
+	/*
+	 * Turn off I-cache and invalidate it
+	 */
+	icache_disable();
+	invalidate_icache_all();
+
+	/*
+	 * turn off D-cache
+	 * dcache_disable() in turn flushes the d-cache and disables MMU
+	 */
+	dcache_disable();
+	invalidate_dcache_all();
+
+	return 0;
+}
diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
new file mode 100644
index 0000000..b91a1b6
--- /dev/null
+++ b/arch/arm/cpu/armv8/exceptions.S
@@ -0,0 +1,113 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/ptrace.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * Enter Exception.
+ * This will save the processor state that is ELR/X0~X30
+ * to the stack frame.
+ */
+.macro	exception_entry
+	stp	x29, x30, [sp, #-16]!
+	stp	x27, x28, [sp, #-16]!
+	stp	x25, x26, [sp, #-16]!
+	stp	x23, x24, [sp, #-16]!
+	stp	x21, x22, [sp, #-16]!
+	stp	x19, x20, [sp, #-16]!
+	stp	x17, x18, [sp, #-16]!
+	stp	x15, x16, [sp, #-16]!
+	stp	x13, x14, [sp, #-16]!
+	stp	x11, x12, [sp, #-16]!
+	stp	x9, x10, [sp, #-16]!
+	stp	x7, x8, [sp, #-16]!
+	stp	x5, x6, [sp, #-16]!
+	stp	x3, x4, [sp, #-16]!
+	stp	x1, x2, [sp, #-16]!
+
+	/* Could be running at EL3/EL2/EL1 */
+	switch_el x11, 3f, 2f, 1f
+3:	mrs	x1, esr_el3
+	mrs	x2, elr_el3
+	b	0f
+2:	mrs	x1, esr_el2
+	mrs	x2, elr_el2
+	b	0f
+1:	mrs	x1, esr_el1
+	mrs	x2, elr_el1
+0:
+	stp	x2, x0, [sp, #-16]!
+	mov	x0, sp
+.endm
+
+/*
+ * Exception vectors.
+ */
+	.align	11
+	.globl	vectors
+vectors:
+	.align	7
+	b	_do_bad_sync	/* Current EL Synchronous Thread */
+
+	.align	7
+	b	_do_bad_irq	/* Current EL IRQ Thread */
+
+	.align	7
+	b	_do_bad_fiq	/* Current EL FIQ Thread */
+
+	.align	7
+	b	_do_bad_error	/* Current EL Error Thread */
+
+	.align	7
+	b	_do_sync	/* Current EL Synchronous Handler */
+
+	.align	7
+	b	_do_irq		/* Current EL IRQ Handler */
+
+	.align	7
+	b	_do_fiq		/* Current EL FIQ Handler */
+
+	.align	7
+	b	_do_error	/* Current EL Error Handler */
+
+
+_do_bad_sync:
+	exception_entry
+	bl	do_bad_sync
+
+_do_bad_irq:
+	exception_entry
+	bl	do_bad_irq
+
+_do_bad_fiq:
+	exception_entry
+	bl	do_bad_fiq
+
+_do_bad_error:
+	exception_entry
+	bl	do_bad_error
+
+_do_sync:
+	exception_entry
+	bl	do_sync
+
+_do_irq:
+	exception_entry
+	bl	do_irq
+
+_do_fiq:
+	exception_entry
+	bl	do_fiq
+
+_do_error:
+	exception_entry
+	bl	do_error
diff --git a/arch/arm/cpu/armv8/generic_timer.c b/arch/arm/cpu/armv8/generic_timer.c
new file mode 100644
index 0000000..223b95e
--- /dev/null
+++ b/arch/arm/cpu/armv8/generic_timer.c
@@ -0,0 +1,31 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <command.h>
+#include <asm/system.h>
+
+/*
+ * Generic timer implementation of get_tbclk()
+ */
+unsigned long get_tbclk(void)
+{
+	unsigned long cntfrq;
+	asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
+	return cntfrq;
+}
+
+/*
+ * Generic timer implementation of timer_read_counter()
+ */
+unsigned long timer_read_counter(void)
+{
+	unsigned long cntpct;
+	isb();
+	asm volatile("mrs %0, cntpct_el0" : "=r" (cntpct));
+	return cntpct;
+}
diff --git a/arch/arm/cpu/armv8/gic.S b/arch/arm/cpu/armv8/gic.S
new file mode 100644
index 0000000..599aa8f
--- /dev/null
+++ b/arch/arm/cpu/armv8/gic.S
@@ -0,0 +1,106 @@
+/*
+ * GIC Initialization Routines.
+ *
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+#include <asm/gic.h>
+
+
+/*************************************************************************
+ *
+ * void gic_init(void) __attribute__((weak));
+ *
+ * Currently, this routine only initialize secure copy of GIC
+ * with Security Extensions at EL3.
+ *
+ *************************************************************************/
+WEAK(gic_init)
+	branch_if_slave	x0, 2f
+
+	/* Initialize Distributor and SPIs */
+	ldr	x1, =GICD_BASE
+	mov	w0, #0x3		/* EnableGrp0 | EnableGrp1 */
+	str	w0, [x1, GICD_CTLR]	/* Secure GICD_CTLR */
+	ldr	w0, [x1, GICD_TYPER]
+	and	w2, w0, #0x1f		/* ITLinesNumber */
+	cbz	w2, 2f			/* No SPIs */
+	add	x1, x1, (GICD_IGROUPRn + 4)
+	mov	w0, #~0			/* Config SPIs as Grp1 */
+1:	str	w0, [x1], #0x4
+	sub	w2, w2, #0x1
+	cbnz	w2, 1b
+
+	/* Initialize SGIs and PPIs */
+2:	ldr	x1, =GICD_BASE
+	mov	w0, #~0			/* Config SGIs and PPIs as Grp1 */
+	str	w0, [x1, GICD_IGROUPRn]	/* GICD_IGROUPR0 */
+	mov	w0, #0x1		/* Enable SGI 0 */
+	str	w0, [x1, GICD_ISENABLERn]
+
+	/* Initialize Cpu Interface */
+	ldr	x1, =GICC_BASE
+	mov	w0, #0x1e7		/* Disable IRQ/FIQ Bypass & */
+					/* Enable Ack Group1 Interrupt & */
+					/* EnableGrp0 & EnableGrp1 */
+	str	w0, [x1, GICC_CTLR]	/* Secure GICC_CTLR */
+
+	mov	w0, #0x1 << 7		/* Non-Secure access to GICC_PMR */
+	str	w0, [x1, GICC_PMR]
+
+	ret
+ENDPROC(gic_init)
+
+
+/*************************************************************************
+ *
+ * void gic_send_sgi(u64 sgi) __attribute__((weak));
+ *
+ *************************************************************************/
+WEAK(gic_send_sgi)
+	ldr	x1, =GICD_BASE
+	mov	w2, #0x8000
+	movk	w2, #0x100, lsl #16
+	orr	w2, w2, w0
+	str	w2, [x1, GICD_SGIR]
+	ret
+ENDPROC(gic_send_sgi)
+
+
+/*************************************************************************
+ *
+ * void wait_for_wakeup(void) __attribute__((weak));
+ *
+ * Wait for SGI 0 from master.
+ *
+ *************************************************************************/
+WEAK(wait_for_wakeup)
+	ldr	x1, =GICC_BASE
+0:	wfi
+	ldr	w0, [x1, GICC_AIAR]
+	str	w0, [x1, GICC_AEOIR]
+	cbnz	w0, 0b
+	ret
+ENDPROC(wait_for_wakeup)
+
+
+/*************************************************************************
+ *
+ * void smp_kick_all_cpus(void) __attribute__((weak));
+ *
+ *************************************************************************/
+WEAK(smp_kick_all_cpus)
+	/* Kick secondary cpus up by SGI 0 interrupt */
+	mov	x0, xzr			/* SGI 0 */
+	mov	x29, lr			/* Save LR */
+	bl	gic_send_sgi
+	mov	lr, x29			/* Restore LR */
+	ret
+ENDPROC(smp_kick_all_cpus)
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
new file mode 100644
index 0000000..bcc2603
--- /dev/null
+++ b/arch/arm/cpu/armv8/start.S
@@ -0,0 +1,164 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+#include <asm/armv8/mmu.h>
+
+/*************************************************************************
+ *
+ * Startup Code (reset vector)
+ *
+ *************************************************************************/
+
+.globl	_start
+_start:
+	b	reset
+
+	.align 3
+
+.globl	_TEXT_BASE
+_TEXT_BASE:
+	.quad	CONFIG_SYS_TEXT_BASE
+
+/*
+ * These are defined in the linker script.
+ */
+.globl	_end_ofs
+_end_ofs:
+	.quad	_end - _start
+
+.globl	_bss_start_ofs
+_bss_start_ofs:
+	.quad	__bss_start - _start
+
+.globl	_bss_end_ofs
+_bss_end_ofs:
+	.quad	__bss_end - _start
+
+reset:
+	/*
+	 * Could be EL3/EL2/EL1, Initial State:
+	 * Little Endian, MMU Disabled, i/dCache Disabled
+	 */
+	adr	x0, vectors
+	switch_el x1, 3f, 2f, 1f
+3:	msr	vbar_el3, x0
+	msr	cptr_el3, xzr			/* Enable FP/SIMD */
+	ldr	x0, =COUNTER_FREQUENCY
+	msr	cntfrq_el0, x0			/* Initialize CNTFRQ */
+	b	0f
+2:	msr	vbar_el2, x0
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0			/* Enable FP/SIMD */
+	b	0f
+1:	msr	vbar_el1, x0
+	mov	x0, #3 << 20
+	msr	cpacr_el1, x0			/* Enable FP/SIMD */
+0:
+
+	/* Cache/BPB/TLB Invalidate */
+	bl	__asm_flush_dcache_all		/* dCache clean&invalidate */
+	bl	__asm_invalidate_icache_all	/* iCache invalidate */
+	bl	__asm_invalidate_tlb_all	/* invalidate TLBs */
+
+	/* Processor specific initialization */
+	bl	lowlevel_init
+
+	branch_if_master x0, x1, master_cpu
+
+	/*
+	 * Slave CPUs
+	 */
+slave_cpu:
+	wfe
+	ldr	x1, =CPU_RELEASE_ADDR
+	ldr	x0, [x1]
+	cbz	x0, slave_cpu
+	br	x0			/* branch to the given address */
+
+	/*
+	 * Master CPU
+	 */
+master_cpu:
+	bl	_main
+
+/*-----------------------------------------------------------------------*/
+
+WEAK(lowlevel_init)
+	/* Initialize GIC Secure Bank Status */
+	mov	x29, lr			/* Save LR */
+	bl	gic_init
+
+	branch_if_master x0, x1, 1f
+
+	/*
+	 * Slave should wait for master clearing spin table.
+	 * This sync prevent salves observing incorrect
+	 * value of spin table and jumping to wrong place.
+	 */
+	bl	wait_for_wakeup
+
+	/*
+	 * All processors will enter EL2 and optionally EL1.
+	 */
+	bl	armv8_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+	bl	armv8_switch_to_el1
+#endif
+
+1:
+	mov	lr, x29			/* Restore LR */
+	ret
+ENDPROC(lowlevel_init)
+
+/*-----------------------------------------------------------------------*/
+
+ENTRY(c_runtime_cpu_setup)
+	/* If I-cache is enabled invalidate it */
+#ifndef CONFIG_SYS_ICACHE_OFF
+	ic	iallu			/* I+BTB cache invalidate */
+	isb	sy
+#endif
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+	/*
+	 * Setup MAIR and TCR.
+	 */
+	ldr	x0, =MEMORY_ATTRIBUTES
+	ldr	x1, =TCR_FLAGS
+
+	switch_el x2, 3f, 2f, 1f
+3:	orr	x1, x1, TCR_EL3_IPS_BITS
+	msr	mair_el3, x0
+	msr	tcr_el3, x1
+	b	0f
+2:	orr	x1, x1, TCR_EL2_IPS_BITS
+	msr	mair_el2, x0
+	msr	tcr_el2, x1
+	b	0f
+1:	orr	x1, x1, TCR_EL1_IPS_BITS
+	msr	mair_el1, x0
+	msr	tcr_el1, x1
+0:
+#endif
+
+	/* Relocate vBAR */
+	adr	x0, vectors
+	switch_el x1, 3f, 2f, 1f
+3:	msr	vbar_el3, x0
+	b	0f
+2:	msr	vbar_el2, x0
+	b	0f
+1:	msr	vbar_el1, x0
+0:
+
+	ret
+ENDPROC(c_runtime_cpu_setup)
diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
new file mode 100644
index 0000000..f840b04
--- /dev/null
+++ b/arch/arm/cpu/armv8/tlb.S
@@ -0,0 +1,34 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+
+/*
+ * void __asm_invalidate_tlb_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_tlb_all)
+	switch_el x9, 3f, 2f, 1f
+3:	tlbi	alle3
+	dsb	sy
+	isb
+	b	0f
+2:	tlbi	alle2
+	dsb	sy
+	isb
+	b	0f
+1:	tlbi	vmalle1
+	dsb	sy
+	isb
+0:
+	ret
+ENDPROC(__asm_invalidate_tlb_all)
diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
new file mode 100644
index 0000000..e0a5946
--- /dev/null
+++ b/arch/arm/cpu/armv8/transition.S
@@ -0,0 +1,83 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+
+ENTRY(armv8_switch_to_el2)
+	switch_el x0, 1f, 0f, 0f
+0:	ret
+1:
+	mov	x0, #0x5b1	/* Non-secure EL0/EL1 | HVC | 64bit EL2 */
+	msr	scr_el3, x0
+	msr	cptr_el3, xzr	/* Disable coprocessor traps to EL3 */
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0	/* Disable coprocessor traps to EL2 */
+
+	/* Initialize SCTLR_EL2 */
+	msr	sctlr_el2, xzr
+
+	/* Return to the EL2_SP2 mode from EL3 */
+	mov	x0, sp
+	msr	sp_el2, x0	/* Migrate SP */
+	mrs	x0, vbar_el3
+	msr	vbar_el2, x0	/* Migrate VBAR */
+	mov	x0, #0x3c9
+	msr	spsr_el3, x0	/* EL2_SP2 | D | A | I | F */
+	msr	elr_el3, lr
+	eret
+ENDPROC(armv8_switch_to_el2)
+
+ENTRY(armv8_switch_to_el1)
+	switch_el x0, 0f, 1f, 0f
+0:	ret
+1:
+	/* Initialize Generic Timers */
+	mrs	x0, cnthctl_el2
+	orr	x0, x0, #0x3		/* Enable EL1 access to timers */
+	msr	cnthctl_el2, x0
+	msr	cntvoff_el2, x0
+	mrs	x0, cntkctl_el1
+	orr	x0, x0, #0x3		/* Enable EL0 access to timers */
+	msr	cntkctl_el1, x0
+
+	/* Initilize MPID/MPIDR registers */
+	mrs	x0, midr_el1
+	mrs	x1, mpidr_el1
+	msr	vpidr_el2, x0
+	msr	vmpidr_el2, x1
+
+	/* Disable coprocessor traps */
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
+	msr	hstr_el2, xzr		/* Disable coprocessor traps to EL2 */
+	mov	x0, #3 << 20
+	msr	cpacr_el1, x0		/* Enable FP/SIMD at EL1 */
+
+	/* Initialize HCR_EL2 */
+	mov	x0, #(1 << 31)		/* 64bit EL1 */
+	orr	x0, x0, #(1 << 29)	/* Disable HVC */
+	msr	hcr_el2, x0
+
+	/* SCTLR_EL1 initialization */
+	mov	x0, #0x0800
+	movk	x0, #0x30d0, lsl #16
+	msr	sctlr_el1, x0
+
+	/* Return to the EL1_SP1 mode from EL2 */
+	mov	x0, sp
+	msr	sp_el1, x0		/* Migrate SP */
+	mrs	x0, vbar_el2
+	msr	vbar_el1, x0		/* Migrate VBAR */
+	mov	x0, #0x3c5
+	msr	spsr_el2, x0		/* EL1_SP1 | D | A | I | F */
+	msr	elr_el2, lr
+	eret
+ENDPROC(armv8_switch_to_el1)
diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
new file mode 100644
index 0000000..4c12222
--- /dev/null
+++ b/arch/arm/cpu/armv8/u-boot.lds
@@ -0,0 +1,89 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <fenghua@phytium.com.cn>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+ENTRY(_start)
+SECTIONS
+{
+	. = 0x00000000;
+
+	. = ALIGN(8);
+	.text :
+	{
+		*(.__image_copy_start)
+		CPUDIR/start.o (.text*)
+		*(.text*)
+	}
+
+	. = ALIGN(8);
+	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
+
+	. = ALIGN(8);
+	.data : {
+		*(.data*)
+	}
+
+	. = ALIGN(8);
+
+	. = .;
+
+	. = ALIGN(8);
+	.u_boot_list : {
+		KEEP(*(SORT(.u_boot_list*)));
+	}
+
+	. = ALIGN(8);
+
+	.image_copy_end :
+	{
+		*(.__image_copy_end)
+	}
+
+	. = ALIGN(8);
+
+	.rel_dyn_start :
+	{
+		*(.__rel_dyn_start)
+	}
+
+	.rela.dyn : {
+		*(.rela*)
+	}
+
+	.rel_dyn_end :
+	{
+		*(.__rel_dyn_end)
+	}
+
+	_end = .;
+
+	. = ALIGN(8);
+
+	.bss_start : {
+		KEEP(*(.__bss_start));
+	}
+
+	.bss : {
+		*(.bss*)
+		 . = ALIGN(8);
+	}
+
+	.bss_end : {
+		KEEP(*(.__bss_end));
+	}
+
+	/DISCARD/ : { *(.dynsym) }
+	/DISCARD/ : { *(.dynstr*) }
+	/DISCARD/ : { *(.dynamic*) }
+	/DISCARD/ : { *(.plt*) }
+	/DISCARD/ : { *(.interp*) }
+	/DISCARD/ : { *(.gnu*) }
+}