| /* SPDX-License-Identifier: GPL-2.0+ */ |
| /* |
| * (C) Copyright 2008 - 2013 Tensilica Inc. |
| * (C) Copyright 2014 - 2016 Cadence Design Systems Inc. |
| */ |
| |
| #include <config.h> |
| #include <asm/asmmacro.h> |
| #include <asm/cacheasm.h> |
| #include <asm/regs.h> |
| #include <asm/arch/tie.h> |
| #include <asm-offsets.h> |
| |
| /* |
| * Offsets into the the pt_regs struture. |
| * Make sure these always match with the structure defined in ptrace.h! |
| */ |
| |
| #define PT_PC 0 |
| #define PT_PS 4 |
| #define PT_DEPC 8 |
| #define PT_EXCCAUSE 12 |
| #define PT_EXCVADDR 16 |
| #define PT_DEBUGCAUSE 20 |
| #define PT_WMASK 24 |
| #define PT_LBEG 28 |
| #define PT_LEND 32 |
| #define PT_LCOUNT 36 |
| #define PT_SAR 40 |
| #define PT_WINDOWBASE 44 |
| #define PT_WINDOWSTART 48 |
| #define PT_SYSCALL 52 |
| #define PT_ICOUNTLEVEL 56 |
| #define PT_RESERVED 60 |
| #define PT_AREG 64 |
| #define PT_SIZE (64 + 64) |
| |
| /* |
| * Cache attributes are different for full MMU and region protection. |
| */ |
| |
| #if XCHAL_HAVE_PTP_MMU |
| #define CA_WRITEBACK (0x7) |
| #else |
| #define CA_WRITEBACK (0x4) |
| #endif |
| |
| /* |
| * Reset vector. |
| * Only a trampoline to jump to _start |
| * (Note that we have to mark the section writable as the section contains |
| * a relocatable literal) |
| */ |
| |
| .section .ResetVector.text, "awx" |
| .global _ResetVector |
| _ResetVector: |
| |
| j 1f |
| .align 4 |
| 2: .long _start |
| 1: l32r a2, 2b |
| jx a2 |
| |
| |
| /* |
| * Processor initialization. We still run in rom space. |
| * |
| * NOTE: Running in ROM |
| * For Xtensa, we currently don't allow to run some code from ROM but |
| * unpack the data immediately to memory. This requires, for example, |
| * that DDR has been set up before running U-Boot. (See also comments |
| * inline for ways to change it) |
| */ |
| |
| .section .reset.text, "ax" |
| .global _start |
| .align 4 |
| _start: |
| /* Keep a0 = 0 for various initializations */ |
| |
| movi a0, 0 |
| |
| /* |
| * For full MMU cores, put page table at unmapped virtual address. |
| * This ensures that accesses outside the static maps result |
| * in miss exceptions rather than random behaviour. |
| */ |
| |
| #if XCHAL_HAVE_PTP_MMU |
| wsr a0, PTEVADDR |
| #endif |
| |
| /* Disable dbreak debug exceptions */ |
| |
| #if XCHAL_HAVE_DEBUG && XCHAL_NUM_DBREAK > 0 |
| .set _index, 0 |
| .rept XCHAL_NUM_DBREAK |
| wsr a0, DBREAKC + _index |
| .set _index, _index + 1 |
| .endr |
| #endif |
| |
| /* Reset windowbase and windowstart */ |
| |
| #if XCHAL_HAVE_WINDOWED |
| movi a3, 1 |
| wsr a3, windowstart |
| wsr a0, windowbase |
| rsync |
| movi a0, 0 /* windowbase might have changed */ |
| #endif |
| |
| /* |
| * Vecbase in bitstream may differ from header files |
| * set or check it. |
| */ |
| |
| #if XCHAL_HAVE_VECBASE |
| movi a3, XCHAL_VECBASE_RESET_VADDR /* VECBASE reset value */ |
| wsr a3, VECBASE |
| #endif |
| |
| #if XCHAL_HAVE_LOOPS |
| /* Disable loops */ |
| |
| wsr a0, LCOUNT |
| #endif |
| |
| /* Set PS.WOE = 0, PS.EXCM = 0 (for loop), PS.INTLEVEL = EXCM level */ |
| |
| #if XCHAL_HAVE_XEA1 |
| movi a2, 1 |
| #else |
| movi a2, XCHAL_EXCM_LEVEL |
| #endif |
| wsr a2, PS |
| rsync |
| |
| /* Unlock and invalidate caches */ |
| |
| ___unlock_dcache_all a2, a3 |
| ___invalidate_dcache_all a2, a3 |
| ___unlock_icache_all a2, a3 |
| ___invalidate_icache_all a2, a3 |
| |
| isync |
| |
| /* Unpack data sections */ |
| |
| movi a2, __reloc_table_start |
| movi a3, __reloc_table_end |
| |
| 1: beq a2, a3, 3f # no more entries? |
| l32i a4, a2, 0 # start destination (in RAM) |
| l32i a5, a2, 4 # end destination (in RAM) |
| l32i a6, a2, 8 # start source (in ROM) |
| addi a2, a2, 12 # next entry |
| beq a4, a5, 1b # skip, empty entry |
| beq a4, a6, 1b # skip, source and destination are the same |
| |
| /* If there's memory protection option with 512MB TLB regions and |
| * cache attributes in TLB entries and caching is not inhibited, |
| * enable data/instruction cache for relocated image. |
| */ |
| #if XCHAL_HAVE_SPANNING_WAY && \ |
| !(defined(CONFIG_SYS_DCACHE_OFF) && defined(CONFIG_SYS_ICACHE_OFF)) |
| srli a7, a4, 29 |
| slli a7, a7, 29 |
| addi a7, a7, XCHAL_SPANNING_WAY |
| #ifndef CONFIG_SYS_DCACHE_OFF |
| rdtlb1 a8, a7 |
| srli a8, a8, 4 |
| slli a8, a8, 4 |
| addi a8, a8, CA_WRITEBACK |
| wdtlb a8, a7 |
| #endif |
| #ifndef CONFIG_SYS_ICACHE_OFF |
| ritlb1 a8, a7 |
| srli a8, a8, 4 |
| slli a8, a8, 4 |
| addi a8, a8, CA_WRITEBACK |
| witlb a8, a7 |
| #endif |
| isync |
| #endif |
| |
| 2: l32i a7, a6, 0 |
| addi a6, a6, 4 |
| s32i a7, a4, 0 |
| addi a4, a4, 4 |
| bltu a4, a5, 2b |
| j 1b |
| |
| 3: /* All code and initalized data segments have been copied */ |
| |
| /* Setup PS, PS.WOE = 1, PS.EXCM = 0, PS.INTLEVEL = EXCM level. */ |
| |
| #if __XTENSA_CALL0_ABI__ |
| movi a2, XCHAL_EXCM_LEVEL |
| #else |
| movi a2, (1<<PS_WOE_BIT) | XCHAL_EXCM_LEVEL |
| #endif |
| wsr a2, PS |
| rsync |
| |
| /* Writeback */ |
| |
| ___flush_dcache_all a2, a3 |
| |
| #ifdef __XTENSA_WINDOWED_ABI__ |
| /* |
| * In windowed ABI caller and call target need to be within the same |
| * gigabyte. Put the rest of the code into the text segment and jump |
| * there. |
| */ |
| |
| movi a4, .Lboard_init_code |
| jx a4 |
| |
| .text |
| .align 4 |
| .Lboard_init_code: |
| #endif |
| |
| movi a0, 0 |
| movi sp, (XTENSA_SYS_TEXT_ADDR - 16) & 0xfffffff0 |
| |
| #ifdef CONFIG_DEBUG_UART |
| movi a4, debug_uart_init |
| #ifdef __XTENSA_CALL0_ABI__ |
| callx0 a4 |
| #else |
| callx4 a4 |
| #endif |
| #endif |
| |
| movi a4, board_init_f_alloc_reserve |
| |
| #ifdef __XTENSA_CALL0_ABI__ |
| mov a2, sp |
| callx0 a4 |
| mov sp, a2 |
| #else |
| mov a6, sp |
| callx4 a4 |
| movsp sp, a6 |
| #endif |
| |
| movi a4, board_init_f_init_reserve |
| |
| #ifdef __XTENSA_CALL0_ABI__ |
| callx0 a4 |
| #else |
| callx4 a4 |
| #endif |
| |
| /* |
| * Call board initialization routine (never returns). |
| */ |
| |
| movi a4, board_init_f |
| |
| #ifdef __XTENSA_CALL0_ABI__ |
| movi a2, 0 |
| callx0 a4 |
| #else |
| movi a6, 0 |
| callx4 a4 |
| #endif |
| /* Never Returns */ |
| ill |
| |
| /* |
| * void relocate_code (addr_sp, gd, addr_moni) |
| * |
| * This "function" does not return, instead it continues in RAM |
| * after relocating the monitor code. |
| * |
| * a2 = addr_sp |
| * a3 = gd |
| * a4 = destination address |
| */ |
| .text |
| .globl relocate_code |
| .align 4 |
| relocate_code: |
| abi_entry |
| |
| #ifdef __XTENSA_CALL0_ABI__ |
| mov a1, a2 |
| mov a2, a3 |
| mov a3, a4 |
| movi a0, board_init_r |
| callx0 a0 |
| #else |
| /* We can't movsp here, because the chain of stack frames may cross |
| * the now reserved memory. We need to toss all window frames except |
| * the current, create new pristine stack frame and start from scratch. |
| */ |
| rsr a0, windowbase |
| ssl a0 |
| movi a0, 1 |
| sll a0, a0 |
| wsr a0, windowstart |
| rsync |
| |
| movi a0, 0 |
| |
| /* Reserve 16-byte save area */ |
| addi sp, a2, -16 |
| mov a6, a3 |
| mov a7, a4 |
| movi a4, board_init_r |
| callx4 a4 |
| #endif |
| ill |
| |
| #if XCHAL_HAVE_EXCEPTIONS |
| |
| /* |
| * Exception vectors. |
| * |
| * Various notes: |
| * - We currently don't use the user exception vector (PS.UM is always 0), |
| * but do define such a vector, just in case. They both jump to the |
| * same exception handler, though. |
| * - We currently only save the bare minimum number of registers: |
| * a0...a15, sar, loop-registers, exception register (epc1, excvaddr, |
| * exccause, depc) |
| * - WINDOWSTART is only saved to identify if registers have been spilled |
| * to the wrong stack (exception stack) while executing the exception |
| * handler. |
| */ |
| |
| .section .KernelExceptionVector.text, "ax" |
| .global _KernelExceptionVector |
| _KernelExceptionVector: |
| |
| wsr a2, EXCSAVE1 |
| movi a2, ExceptionHandler |
| jx a2 |
| |
| .section .UserExceptionVector.text, "ax" |
| .global _UserExceptionVector |
| _UserExceptionVector: |
| |
| wsr a2, EXCSAVE1 |
| movi a2, ExceptionHandler |
| jx a2 |
| |
| #if !XCHAL_HAVE_XEA1 |
| .section .DoubleExceptionVector.text, "ax" |
| .global _DoubleExceptionVector |
| _DoubleExceptionVector: |
| |
| #ifdef __XTENSA_CALL0_ABI__ |
| wsr a0, EXCSAVE1 |
| movi a0, hang # report and ask user to reset board |
| callx0 a0 |
| #else |
| wsr a4, EXCSAVE1 |
| movi a4, hang # report and ask user to reset board |
| callx4 a4 |
| #endif |
| #endif |
| /* Does not return here */ |
| |
| |
| .text |
| .align 4 |
| ExceptionHandler: |
| |
| rsr a2, EXCCAUSE # find handler |
| |
| #if XCHAL_HAVE_WINDOWED |
| /* Special case for alloca handler */ |
| |
| bnei a2, 5, 1f # jump if not alloca exception |
| |
| addi a1, a1, -16 - 4 # create a small stack frame |
| s32i a3, a1, 0 # and save a3 (a2 still in excsave1) |
| movi a2, fast_alloca_exception |
| jx a2 # jump to fast_alloca_exception |
| #endif |
| /* All other exceptions go here: */ |
| |
| /* Create ptrace stack and save a0...a3 */ |
| |
| 1: addi a2, a1, - PT_SIZE - 16 |
| s32i a0, a2, PT_AREG + 0 * 4 |
| s32i a1, a2, PT_AREG + 1 * 4 |
| s32i a3, a2, PT_AREG + 3 * 4 |
| rsr a3, EXCSAVE1 |
| s32i a3, a2, PT_AREG + 2 * 4 |
| mov a1, a2 |
| |
| /* Save remaining AR registers */ |
| |
| s32i a4, a1, PT_AREG + 4 * 4 |
| s32i a5, a1, PT_AREG + 5 * 4 |
| s32i a6, a1, PT_AREG + 6 * 4 |
| s32i a7, a1, PT_AREG + 7 * 4 |
| s32i a8, a1, PT_AREG + 8 * 4 |
| s32i a9, a1, PT_AREG + 9 * 4 |
| s32i a10, a1, PT_AREG + 10 * 4 |
| s32i a11, a1, PT_AREG + 11 * 4 |
| s32i a12, a1, PT_AREG + 12 * 4 |
| s32i a13, a1, PT_AREG + 13 * 4 |
| s32i a14, a1, PT_AREG + 14 * 4 |
| s32i a15, a1, PT_AREG + 15 * 4 |
| |
| /* Save SRs */ |
| |
| #if XCHAL_HAVE_WINDOWED |
| rsr a2, WINDOWSTART |
| s32i a2, a1, PT_WINDOWSTART |
| #endif |
| |
| rsr a2, SAR |
| rsr a3, EPC1 |
| rsr a4, EXCVADDR |
| s32i a2, a1, PT_SAR |
| s32i a3, a1, PT_PC |
| s32i a4, a1, PT_EXCVADDR |
| |
| #if XCHAL_HAVE_LOOPS |
| movi a2, 0 |
| rsr a3, LBEG |
| xsr a2, LCOUNT |
| s32i a3, a1, PT_LBEG |
| rsr a3, LEND |
| s32i a2, a1, PT_LCOUNT |
| s32i a3, a1, PT_LEND |
| #endif |
| |
| /* Set up C environment and call registered handler */ |
| /* Setup stack, PS.WOE = 1, PS.EXCM = 0, PS.INTLEVEL = EXCM level. */ |
| |
| rsr a2, EXCCAUSE |
| #if XCHAL_HAVE_XEA1 |
| movi a3, (1<<PS_WOE_BIT) | 1 |
| #elif __XTENSA_CALL0_ABI__ |
| movi a3, XCHAL_EXCM_LEVEL |
| #else |
| movi a3, (1<<PS_WOE_BIT) | XCHAL_EXCM_LEVEL |
| #endif |
| xsr a3, PS |
| rsync |
| s32i a2, a1, PT_EXCCAUSE |
| s32i a3, a1, PT_PS |
| |
| movi a0, exc_table |
| addx4 a0, a2, a0 |
| l32i a0, a0, 0 |
| #ifdef __XTENSA_CALL0_ABI__ |
| mov a2, a1 # Provide stack frame as only argument |
| callx0 a0 |
| l32i a3, a1, PT_PS |
| #else |
| mov a6, a1 # Provide stack frame as only argument |
| callx4 a0 |
| #endif |
| |
| /* Restore PS and go to exception mode (PS.EXCM=1) */ |
| |
| wsr a3, PS |
| |
| /* Restore SR registers */ |
| |
| #if XCHAL_HAVE_LOOPS |
| l32i a2, a1, PT_LBEG |
| l32i a3, a1, PT_LEND |
| l32i a4, a1, PT_LCOUNT |
| wsr a2, LBEG |
| wsr a3, LEND |
| wsr a4, LCOUNT |
| #endif |
| |
| l32i a2, a1, PT_SAR |
| l32i a3, a1, PT_PC |
| wsr a2, SAR |
| wsr a3, EPC1 |
| |
| #if XCHAL_HAVE_WINDOWED |
| /* Do we need to simulate a MOVSP? */ |
| |
| l32i a2, a1, PT_WINDOWSTART |
| addi a3, a2, -1 |
| and a2, a2, a3 |
| beqz a2, 1f # Skip if regs were spilled before exc. |
| |
| rsr a2, WINDOWSTART |
| addi a3, a2, -1 |
| and a2, a2, a3 |
| bnez a2, 1f # Skip if registers aren't spilled now |
| |
| addi a2, a1, -16 |
| l32i a4, a2, 0 |
| l32i a5, a2, 4 |
| s32i a4, a1, PT_SIZE + 0 |
| s32i a5, a1, PT_SIZE + 4 |
| l32i a4, a2, 8 |
| l32i a5, a2, 12 |
| s32i a4, a1, PT_SIZE + 8 |
| s32i a5, a1, PT_SIZE + 12 |
| #endif |
| |
| /* Restore address register */ |
| |
| 1: l32i a15, a1, PT_AREG + 15 * 4 |
| l32i a14, a1, PT_AREG + 14 * 4 |
| l32i a13, a1, PT_AREG + 13 * 4 |
| l32i a12, a1, PT_AREG + 12 * 4 |
| l32i a11, a1, PT_AREG + 11 * 4 |
| l32i a10, a1, PT_AREG + 10 * 4 |
| l32i a9, a1, PT_AREG + 9 * 4 |
| l32i a8, a1, PT_AREG + 8 * 4 |
| l32i a7, a1, PT_AREG + 7 * 4 |
| l32i a6, a1, PT_AREG + 6 * 4 |
| l32i a5, a1, PT_AREG + 5 * 4 |
| l32i a4, a1, PT_AREG + 4 * 4 |
| l32i a3, a1, PT_AREG + 3 * 4 |
| l32i a2, a1, PT_AREG + 2 * 4 |
| l32i a0, a1, PT_AREG + 0 * 4 |
| |
| l32i a1, a1, PT_AREG + 1 * 4 # Remove ptrace stack frame |
| |
| rfe |
| |
| #endif /* XCHAL_HAVE_EXCEPTIONS */ |
| |
| #if XCHAL_HAVE_WINDOWED |
| |
| /* |
| * Window overflow and underflow handlers. |
| * The handlers must be 64 bytes apart, first starting with the underflow |
| * handlers underflow-4 to underflow-12, then the overflow handlers |
| * overflow-4 to overflow-12. |
| * |
| * Note: We rerun the underflow handlers if we hit an exception, so |
| * we try to access any page that would cause a page fault early. |
| */ |
| |
| .section .WindowVectors.text, "ax" |
| |
| /* 4-Register Window Overflow Vector (Handler) */ |
| |
| .align 64 |
| .global _WindowOverflow4 |
| _WindowOverflow4: |
| s32e a0, a5, -16 |
| s32e a1, a5, -12 |
| s32e a2, a5, -8 |
| s32e a3, a5, -4 |
| rfwo |
| |
| |
| /* 4-Register Window Underflow Vector (Handler) */ |
| |
| .align 64 |
| .global _WindowUnderflow4 |
| _WindowUnderflow4: |
| l32e a0, a5, -16 |
| l32e a1, a5, -12 |
| l32e a2, a5, -8 |
| l32e a3, a5, -4 |
| rfwu |
| |
| /* |
| * a0: a0 |
| * a1: new stack pointer = a1 - 16 - 4 |
| * a2: available, saved in excsave1 |
| * a3: available, saved on stack *a1 |
| */ |
| |
| /* 15*/ .byte 0xff |
| |
| fast_alloca_exception: /* must be at _WindowUnderflow4 + 16 */ |
| |
| /* 16*/ rsr a2, PS |
| /* 19*/ rsr a3, WINDOWBASE |
| /* 22*/ extui a2, a2, PS_OWB_SHIFT, PS_OWB_SHIFT |
| /* 25*/ xor a2, a2, a3 |
| /* 28*/ rsr a3, PS |
| /* 31*/ slli a2, a2, PS_OWB_SHIFT |
| /* 34*/ xor a2, a3, a2 |
| /* 37*/ wsr a2, PS |
| |
| /* 40*/ _l32i a3, a1, 0 |
| /* 43*/ addi a1, a1, 16 + 4 |
| /* 46*/ rsr a2, EXCSAVE1 |
| |
| /* 49*/ rotw -1 |
| /* 52*/ _bbci.l a4, 31, _WindowUnderflow4 /* 0x: call4 */ |
| /* 55*/ rotw -1 |
| /* 58*/ _bbci.l a8, 30, _WindowUnderflow8 /* 10: call8 */ |
| /* 61*/ _j __WindowUnderflow12 /* 11: call12 */ |
| /* 64*/ |
| |
| /* 8-Register Window Overflow Vector (Handler) */ |
| |
| .align 64 |
| .global _WindowOverflow8 |
| _WindowOverflow8: |
| s32e a0, a9, -16 |
| l32e a0, a1, -12 |
| s32e a2, a9, -8 |
| s32e a1, a9, -12 |
| s32e a3, a9, -4 |
| s32e a4, a0, -32 |
| s32e a5, a0, -28 |
| s32e a6, a0, -24 |
| s32e a7, a0, -20 |
| rfwo |
| |
| /* 8-Register Window Underflow Vector (Handler) */ |
| |
| .align 64 |
| .global _WindowUnderflow8 |
| _WindowUnderflow8: |
| l32e a1, a9, -12 |
| l32e a0, a9, -16 |
| l32e a7, a1, -12 |
| l32e a2, a9, -8 |
| l32e a4, a7, -32 |
| l32e a3, a9, -4 |
| l32e a5, a7, -28 |
| l32e a6, a7, -24 |
| l32e a7, a7, -20 |
| rfwu |
| |
| /* 12-Register Window Overflow Vector (Handler) */ |
| |
| .align 64 |
| .global _WindowOverflow12 |
| _WindowOverflow12: |
| s32e a0, a13, -16 |
| l32e a0, a1, -12 |
| s32e a1, a13, -12 |
| s32e a2, a13, -8 |
| s32e a3, a13, -4 |
| s32e a4, a0, -48 |
| s32e a5, a0, -44 |
| s32e a6, a0, -40 |
| s32e a7, a0, -36 |
| s32e a8, a0, -32 |
| s32e a9, a0, -28 |
| s32e a10, a0, -24 |
| s32e a11, a0, -20 |
| rfwo |
| |
| /* 12-Register Window Underflow Vector (Handler) */ |
| |
| .org _WindowOverflow12 + 64 - 3 |
| __WindowUnderflow12: |
| rotw -1 |
| .global _WindowUnderflow12 |
| _WindowUnderflow12: |
| l32e a1, a13, -12 |
| l32e a0, a13, -16 |
| l32e a11, a1, -12 |
| l32e a2, a13, -8 |
| l32e a4, a11, -48 |
| l32e a8, a11, -32 |
| l32e a3, a13, -4 |
| l32e a5, a11, -44 |
| l32e a6, a11, -40 |
| l32e a7, a11, -36 |
| l32e a9, a11, -28 |
| l32e a10, a11, -24 |
| l32e a11, a11, -20 |
| rfwu |
| |
| #endif /* XCHAL_HAVE_WINDOWED */ |