blob: 00672c19ad9b10c40d9ef86629c2b278af8cf137 [file] [log] [blame]
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +01001/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2013 Regents of the University of California
4 */
5
6#include <linux/linkage.h>
7#include <asm/asm.h>
8
9/* void *memcpy(void *, const void *, size_t) */
10ENTRY(__memcpy)
11WEAK(memcpy)
Bin Meng703b84e2021-05-13 16:46:17 +080012 /* Save for return value */
13 mv t6, a0
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010014
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010015 /*
Bin Meng703b84e2021-05-13 16:46:17 +080016 * Register allocation for code below:
17 * a0 - start of uncopied dst
18 * a1 - start of uncopied src
19 * t0 - end of uncopied dst
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010020 */
Bin Meng703b84e2021-05-13 16:46:17 +080021 add t0, a0, a2
22
23 /*
24 * Use bytewise copy if too small.
25 *
26 * This threshold must be at least 2*SZREG to ensure at least one
27 * wordwise copy is performed. It is chosen to be 16 because it will
28 * save at least 7 iterations of bytewise copy, which pays off the
29 * fixed overhead.
30 */
31 li a3, 16
32 bltu a2, a3, .Lbyte_copy_tail
33
34 /*
35 * Bytewise copy first to align a0 to word boundary.
36 */
37 addi a2, a0, SZREG-1
38 andi a2, a2, ~(SZREG-1)
39 beq a0, a2, 2f
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +0100401:
Bin Meng703b84e2021-05-13 16:46:17 +080041 lb a5, 0(a1)
42 addi a1, a1, 1
43 sb a5, 0(a0)
44 addi a0, a0, 1
45 bne a0, a2, 1b
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +0100462:
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010047
Bin Meng703b84e2021-05-13 16:46:17 +080048 /*
49 * Now a0 is word-aligned. If a1 is also word aligned, we could perform
50 * aligned word-wise copy. Otherwise we need to perform misaligned
51 * word-wise copy.
52 */
53 andi a3, a1, SZREG-1
54 bnez a3, .Lmisaligned_word_copy
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010055
Bin Meng703b84e2021-05-13 16:46:17 +080056 /* Unrolled wordwise copy */
57 addi t0, t0, -(16*SZREG-1)
58 bgeu a0, t0, 2f
591:
60 REG_L a2, 0(a1)
61 REG_L a3, SZREG(a1)
62 REG_L a4, 2*SZREG(a1)
63 REG_L a5, 3*SZREG(a1)
64 REG_L a6, 4*SZREG(a1)
65 REG_L a7, 5*SZREG(a1)
66 REG_L t1, 6*SZREG(a1)
67 REG_L t2, 7*SZREG(a1)
68 REG_L t3, 8*SZREG(a1)
69 REG_L t4, 9*SZREG(a1)
70 REG_L t5, 10*SZREG(a1)
71 REG_S a2, 0(a0)
72 REG_S a3, SZREG(a0)
73 REG_S a4, 2*SZREG(a0)
74 REG_S a5, 3*SZREG(a0)
75 REG_S a6, 4*SZREG(a0)
76 REG_S a7, 5*SZREG(a0)
77 REG_S t1, 6*SZREG(a0)
78 REG_S t2, 7*SZREG(a0)
79 REG_S t3, 8*SZREG(a0)
80 REG_S t4, 9*SZREG(a0)
81 REG_S t5, 10*SZREG(a0)
82 REG_L a2, 11*SZREG(a1)
83 REG_L a3, 12*SZREG(a1)
84 REG_L a4, 13*SZREG(a1)
85 REG_L a5, 14*SZREG(a1)
86 REG_L a6, 15*SZREG(a1)
87 addi a1, a1, 16*SZREG
88 REG_S a2, 11*SZREG(a0)
89 REG_S a3, 12*SZREG(a0)
90 REG_S a4, 13*SZREG(a0)
91 REG_S a5, 14*SZREG(a0)
92 REG_S a6, 15*SZREG(a0)
93 addi a0, a0, 16*SZREG
94 bltu a0, t0, 1b
952:
96 /* Post-loop increment by 16*SZREG-1 and pre-loop decrement by SZREG-1 */
97 addi t0, t0, 15*SZREG
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +010098
Bin Meng703b84e2021-05-13 16:46:17 +080099 /* Wordwise copy */
100 bgeu a0, t0, 2f
1011:
102 REG_L a5, 0(a1)
103 addi a1, a1, SZREG
104 REG_S a5, 0(a0)
105 addi a0, a0, SZREG
106 bltu a0, t0, 1b
1072:
108 addi t0, t0, SZREG-1
109
110.Lbyte_copy_tail:
111 /*
112 * Bytewise copy anything left.
113 */
114 beq a0, t0, 2f
1151:
116 lb a5, 0(a1)
117 addi a1, a1, 1
118 sb a5, 0(a0)
119 addi a0, a0, 1
120 bne a0, t0, 1b
1212:
122
123 mv a0, t6
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +0100124 ret
125
Bin Meng703b84e2021-05-13 16:46:17 +0800126.Lmisaligned_word_copy:
127 /*
128 * Misaligned word-wise copy.
129 * For misaligned copy we still perform word-wise copy, but we need to
130 * use the value fetched from the previous iteration and do some shifts.
131 * This is safe because we wouldn't access more words than necessary.
132 */
133
134 /* Calculate shifts */
135 slli t3, a3, 3
136 sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */
137
138 /* Load the initial value and align a1 */
139 andi a1, a1, ~(SZREG-1)
140 REG_L a5, 0(a1)
141
142 addi t0, t0, -(SZREG-1)
143 /* At least one iteration will be executed here, no check */
1441:
145 srl a4, a5, t3
146 REG_L a5, SZREG(a1)
147 addi a1, a1, SZREG
148 sll a2, a5, t4
149 or a2, a2, a4
150 REG_S a2, 0(a0)
151 addi a0, a0, SZREG
152 bltu a0, t0, 1b
153
154 /* Update pointers to correct value */
155 addi t0, t0, SZREG-1
156 add a1, a1, a3
157
158 j .Lbyte_copy_tail
Heinrich Schuchardt8f0dc4c2021-03-27 12:37:04 +0100159END(__memcpy)