blob: 41bb53e501525b4cc0d8e2dede2b02455e48333e [file] [log] [blame]
Alexey Brodkin22723822014-02-04 12:56:15 +04001/*
2 * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
3 *
4 * SPDX-License-Identifier: GPL-2.0+
5 */
6
7/*
8 * If dst and src are 4 byte aligned, copy 8 bytes at a time.
9 * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
10 * it 8 byte aligned. Thus, we can do a little read-ahead, without
11 * dereferencing a cache line that we should not touch.
12 * Note that short and long instructions have been scheduled to avoid
13 * branch stalls.
14 * The beq_s to r3z could be made unaligned & long to avoid a stall
15 * there, but it is not likely to be taken often, and it would also be likely
16 * to cost an unaligned mispredict at the next call.
17 */
18
19.global strcpy
20.align 4
21strcpy:
22 or %r2, %r0, %r1
23 bmsk_s %r2, %r2, 1
24 brne.d %r2, 0, charloop
25 mov_s %r10, %r0
26 ld_s %r3, [%r1, 0]
27 mov %r8, 0x01010101
28 bbit0.d %r1, 2, loop_start
29 ror %r12, %r8
30 sub %r2, %r3, %r8
31 bic_s %r2, %r2, %r3
32 tst_s %r2,%r12
33 bne r3z
34 mov_s %r4,%r3
35 .balign 4
36loop:
37 ld.a %r3, [%r1, 4]
38 st.ab %r4, [%r10, 4]
39loop_start:
40 ld.a %r4, [%r1, 4]
41 sub %r2, %r3, %r8
42 bic_s %r2, %r2, %r3
43 tst_s %r2, %r12
44 bne_s r3z
45 st.ab %r3, [%r10, 4]
46 sub %r2, %r4, %r8
47 bic %r2, %r2, %r4
48 tst %r2, %r12
49 beq loop
50 mov_s %r3, %r4
51#ifdef __LITTLE_ENDIAN__
52r3z: bmsk.f %r1, %r3, 7
53 lsr_s %r3, %r3, 8
54#else /* __BIG_ENDIAN__ */
55r3z: lsr.f %r1, %r3, 24
56 asl_s %r3, %r3, 8
57#endif /* _ENDIAN__ */
58 bne.d r3z
59 stb.ab %r1, [%r10, 1]
60 j_s [%blink]
61
62 .balign 4
63charloop:
64 ldb.ab %r3, [%r1, 1]
65 brne.d %r3, 0, charloop
66 stb.ab %r3, [%r10, 1]
67 j [%blink]