blob: fbe6701dbe4a435aa808b507958c146b845f8d36 [file] [log] [blame]
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +01001/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <linux/linkage.h>
4#include <asm/asm.h>
5
6ENTRY(__memmove)
7WEAK(memmove)
Bin Meng703b84e2021-05-13 16:46:17 +08008 /*
9 * Here we determine if forward copy is possible. Forward copy is
10 * preferred to backward copy as it is more cache friendly.
11 *
12 * If a0 >= a1, t0 gives their distance, if t0 >= a2 then we can
13 * copy forward.
14 * If a0 < a1, we can always copy forward. This will make t0 negative,
15 * so a *unsigned* comparison will always have t0 >= a2.
16 *
17 * For forward copy we just delegate the task to memcpy.
18 */
19 sub t0, a0, a1
20 bltu t0, a2, 1f
21 tail __memcpy
221:
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010023
Bin Meng703b84e2021-05-13 16:46:17 +080024 /*
25 * Register allocation for code below:
26 * a0 - end of uncopied dst
27 * a1 - end of uncopied src
28 * t0 - start of uncopied dst
29 */
30 mv t0, a0
31 add a0, a0, a2
32 add a1, a1, a2
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010033
Bin Meng703b84e2021-05-13 16:46:17 +080034 /*
35 * Use bytewise copy if too small.
36 *
37 * This threshold must be at least 2*SZREG to ensure at least one
38 * wordwise copy is performed. It is chosen to be 16 because it will
39 * save at least 7 iterations of bytewise copy, which pays off the
40 * fixed overhead.
41 */
42 li a3, 16
43 bltu a2, a3, .Lbyte_copy_tail
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010044
Bin Meng703b84e2021-05-13 16:46:17 +080045 /*
46 * Bytewise copy first to align t0 to word boundary.
47 */
48 andi a2, a0, ~(SZREG-1)
49 beq a0, a2, 2f
501:
51 addi a1, a1, -1
52 lb a5, 0(a1)
53 addi a0, a0, -1
54 sb a5, 0(a0)
55 bne a0, a2, 1b
562:
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010057
Bin Meng703b84e2021-05-13 16:46:17 +080058 /*
59 * Now a0 is word-aligned. If a1 is also word aligned, we could perform
60 * aligned word-wise copy. Otherwise we need to perform misaligned
61 * word-wise copy.
62 */
63 andi a3, a1, SZREG-1
64 bnez a3, .Lmisaligned_word_copy
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010065
Bin Meng703b84e2021-05-13 16:46:17 +080066 /* Wordwise copy */
67 addi t0, t0, SZREG-1
68 bleu a0, t0, 2f
691:
70 addi a1, a1, -SZREG
71 REG_L a5, 0(a1)
72 addi a0, a0, -SZREG
73 REG_S a5, 0(a0)
74 bgtu a0, t0, 1b
752:
76 addi t0, t0, -(SZREG-1)
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010077
Bin Meng703b84e2021-05-13 16:46:17 +080078.Lbyte_copy_tail:
79 /*
80 * Bytewise copy anything left.
81 */
82 beq a0, t0, 2f
831:
84 addi a1, a1, -1
85 lb a5, 0(a1)
86 addi a0, a0, -1
87 sb a5, 0(a0)
88 bne a0, t0, 1b
892:
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010090
Bin Meng703b84e2021-05-13 16:46:17 +080091 mv a0, t0
92 ret
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010093
Bin Meng703b84e2021-05-13 16:46:17 +080094.Lmisaligned_word_copy:
95 /*
96 * Misaligned word-wise copy.
97 * For misaligned copy we still perform word-wise copy, but we need to
98 * use the value fetched from the previous iteration and do some shifts.
99 * This is safe because we wouldn't access more words than necessary.
100 */
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +0100101
Bin Meng703b84e2021-05-13 16:46:17 +0800102 /* Calculate shifts */
103 slli t3, a3, 3
104 sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */
105
106 /* Load the initial value and align a1 */
107 andi a1, a1, ~(SZREG-1)
108 REG_L a5, 0(a1)
109
110 addi t0, t0, SZREG-1
111 /* At least one iteration will be executed here, no check */
1121:
113 sll a4, a5, t4
114 addi a1, a1, -SZREG
115 REG_L a5, 0(a1)
116 srl a2, a5, t3
117 or a2, a2, a4
118 addi a0, a0, -SZREG
119 REG_S a2, 0(a0)
120 bgtu a0, t0, 1b
121
122 /* Update pointers to correct value */
123 addi t0, t0, -(SZREG-1)
124 add a1, a1, a3
125
126 j .Lbyte_copy_tail
127
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +0100128END(__memmove)