Blame - arch/riscv/lib/memmove.S - platform/external/u-boot

blob: fbe6701dbe4a435aa808b507958c146b845f8d36 [file] [log] [blame]

Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	1	/* SPDX-License-Identifier: GPL-2.0 */
				2
				3	#include <linux/linkage.h>
				4	#include <asm/asm.h>
				5
				6	ENTRY(__memmove)
				7	WEAK(memmove)
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	8	/*
				9	* Here we determine if forward copy is possible. Forward copy is
				10	* preferred to backward copy as it is more cache friendly.
				11	*
				12	* If a0 >= a1, t0 gives their distance, if t0 >= a2 then we can
				13	* copy forward.
				14	* If a0 < a1, we can always copy forward. This will make t0 negative,
				15	* so a unsigned comparison will always have t0 >= a2.
				16	*
				17	* For forward copy we just delegate the task to memcpy.
				18	*/
				19	sub t0, a0, a1
				20	bltu t0, a2, 1f
				21	tail __memcpy
				22	1:
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	23
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	24	/*
				25	* Register allocation for code below:
				26	* a0 - end of uncopied dst
				27	* a1 - end of uncopied src
				28	* t0 - start of uncopied dst
				29	*/
				30	mv t0, a0
				31	add a0, a0, a2
				32	add a1, a1, a2
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	33
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	34	/*
				35	* Use bytewise copy if too small.
				36	*
				37	* This threshold must be at least 2*SZREG to ensure at least one
				38	* wordwise copy is performed. It is chosen to be 16 because it will
				39	* save at least 7 iterations of bytewise copy, which pays off the
				40	* fixed overhead.
				41	*/
				42	li a3, 16
				43	bltu a2, a3, .Lbyte_copy_tail
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	44
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	45	/*
				46	* Bytewise copy first to align t0 to word boundary.
				47	*/
				48	andi a2, a0, ~(SZREG-1)
				49	beq a0, a2, 2f
				50	1:
				51	addi a1, a1, -1
				52	lb a5, 0(a1)
				53	addi a0, a0, -1
				54	sb a5, 0(a0)
				55	bne a0, a2, 1b
				56	2:
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	57
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	58	/*
				59	* Now a0 is word-aligned. If a1 is also word aligned, we could perform
				60	* aligned word-wise copy. Otherwise we need to perform misaligned
				61	* word-wise copy.
				62	*/
				63	andi a3, a1, SZREG-1
				64	bnez a3, .Lmisaligned_word_copy
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	65
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	66	/* Wordwise copy */
				67	addi t0, t0, SZREG-1
				68	bleu a0, t0, 2f
				69	1:
				70	addi a1, a1, -SZREG
				71	REG_L a5, 0(a1)
				72	addi a0, a0, -SZREG
				73	REG_S a5, 0(a0)
				74	bgtu a0, t0, 1b
				75	2:
				76	addi t0, t0, -(SZREG-1)
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	77
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	78	.Lbyte_copy_tail:
				79	/*
				80	* Bytewise copy anything left.
				81	*/
				82	beq a0, t0, 2f
				83	1:
				84	addi a1, a1, -1
				85	lb a5, 0(a1)
				86	addi a0, a0, -1
				87	sb a5, 0(a0)
				88	bne a0, t0, 1b
				89	2:
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	90
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	91	mv a0, t0
				92	ret
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	93
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	94	.Lmisaligned_word_copy:
				95	/*
				96	* Misaligned word-wise copy.
				97	* For misaligned copy we still perform word-wise copy, but we need to
				98	* use the value fetched from the previous iteration and do some shifts.
				99	* This is safe because we wouldn't access more words than necessary.
				100	*/
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	101
Bin Meng	703b84e	2021-05-13 16:46:17 +0800	[diff] [blame]	102	/* Calculate shifts */
				103	slli t3, a3, 3
				104	sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */
				105
				106	/* Load the initial value and align a1 */
				107	andi a1, a1, ~(SZREG-1)
				108	REG_L a5, 0(a1)
				109
				110	addi t0, t0, SZREG-1
				111	/* At least one iteration will be executed here, no check */
				112	1:
				113	sll a4, a5, t4
				114	addi a1, a1, -SZREG
				115	REG_L a5, 0(a1)
				116	srl a2, a5, t3
				117	or a2, a2, a4
				118	addi a0, a0, -SZREG
				119	REG_S a2, 0(a0)
				120	bgtu a0, t0, 1b
				121
				122	/* Update pointers to correct value */
				123	addi t0, t0, -(SZREG-1)
				124	add a1, a1, a3
				125
				126	j .Lbyte_copy_tail
				127
Heinrich Schuchardt	8f0dc4c	2021-03-27 12:37:04 +0100	[diff] [blame]	128	END(__memmove)