Blame - arch/x86/lib/string.c - platform/external/u-boot

blob: c6263cd2f5a8c4013574812cc045621c678c32a3 [file] [log] [blame]

Tom Rini	83d290c	2018-05-06 17:58:06 -0400	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0+
Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	2	/*
				3	* Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
				4	* This file is part of the GNU C Library.
				5	* Copyright (c) 2011 The Chromium OS Authors.
Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	6	*/
				7
				8	/* From glibc-2.14, sysdeps/i386/memset.c */
				9
Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	10	#include <linux/types.h>
Masahiro Yamada	afc366f	2014-11-26 16:00:58 +0900	[diff] [blame]	11	#include <linux/compiler.h>
				12	#include <asm/string.h>
Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	13
				14	typedef uint32_t op_t;
				15
				16	void memset(void dstpp, int c, size_t len)
				17	{
				18	int d0;
				19	unsigned long int dstp = (unsigned long int) dstpp;
				20
				21	/* This explicit register allocation improves code very much indeed. */
				22	register op_t x asm("ax");
				23
				24	x = (unsigned char) c;
				25
				26	/* Clear the direction flag, so filling will move forward. */
				27	asm volatile("cld");
				28
				29	/* This threshold value is optimal. */
				30	if (len >= 12) {
				31	/* Fill X with four copies of the char we want to fill with. */
				32	x \|= (x << 8);
				33	x \|= (x << 16);
				34
				35	/* Adjust LEN for the bytes handled in the first loop. */
				36	len -= (-dstp) % sizeof(op_t);
				37
				38	/*
				39	* There are at least some bytes to set. No need to test for
				40	* LEN == 0 in this alignment loop.
				41	*/
				42
				43	/* Fill bytes until DSTP is aligned on a longword boundary. */
				44	asm volatile(
				45	"rep\n"
				46	"stosb" /* %0, %2, %3 */ :
				47	"=D" (dstp), "=c" (d0) :
				48	"0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
				49	"memory");
				50
				51	/* Fill longwords. */
				52	asm volatile(
				53	"rep\n"
				54	"stosl" /* %0, %2, %3 */ :
				55	"=D" (dstp), "=c" (d0) :
				56	"0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
				57	"memory");
				58	len %= sizeof(op_t);
				59	}
				60
				61	/* Write the last few bytes. */
				62	asm volatile(
				63	"rep\n"
				64	"stosb" /* %0, %2, %3 */ :
				65	"=D" (dstp), "=c" (d0) :
				66	"0" (dstp), "1" (len), "a" (x) :
				67	"memory");
				68
				69	return dstpp;
				70	}
Graeme Russ	b2c2a03	2011-12-27 22:46:39 +1100	[diff] [blame]	71
				72	#define OP_T_THRES 8
				73	#define OPSIZ (sizeof(op_t))
				74
				75	#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
				76	do { \
				77	int __d0; \
				78	asm volatile( \
				79	/* Clear the direction flag, so copying goes forward. */ \
				80	"cld\n" \
				81	/* Copy bytes. */ \
				82	"rep\n" \
				83	"movsb" : \
				84	"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
				85	"0" (dst_bp), "1" (src_bp), "2" (nbytes) : \
				86	"memory"); \
				87	} while (0)
				88
				89	#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
				90	do { \
				91	int __d0; \
				92	asm volatile( \
				93	/* Clear the direction flag, so copying goes forward. */ \
				94	"cld\n" \
				95	/* Copy longwords. */ \
				96	"rep\n" \
				97	"movsl" : \
				98	"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
				99	"0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \
				100	"memory"); \
				101	(nbytes_left) = (nbytes) % 4; \
				102	} while (0)
				103
				104	void memcpy(void dstpp, const void *srcpp, size_t len)
				105	{
				106	unsigned long int dstp = (long int)dstpp;
				107	unsigned long int srcp = (long int)srcpp;
				108
				109	/* Copy from the beginning to the end. */
				110
				111	/* If there not too few bytes to copy, use word copy. */
				112	if (len >= OP_T_THRES) {
				113	/* Copy just a few bytes to make DSTP aligned. */
				114	len -= (-dstp) % OPSIZ;
				115	BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
				116
				117	/* Copy from SRCP to DSTP taking advantage of the known
				118	* alignment of DSTP. Number of bytes remaining is put
				119	* in the third argument, i.e. in LEN. This number may
				120	* vary from machine to machine.
				121	*/
				122	WORD_COPY_FWD(dstp, srcp, len, len);
				123
				124	/* Fall out and copy the tail. */
				125	}
				126
				127	/* There are just a few bytes to copy. Use byte memory operations. */
				128	BYTE_COPY_FWD(dstp, srcp, len);
				129
				130	return dstpp;
				131	}
Simon Glass	a5b8722	2016-10-05 20:42:10 -0600	[diff] [blame]	132
				133	void memmove(void dest, const void *src, size_t n)
				134	{
				135	int d0, d1, d2, d3, d4, d5;
				136	char *ret = dest;
				137
				138	__asm__ __volatile__(
				139	/* Handle more 16 bytes in loop */
				140	"cmp $0x10, %0\n\t"
				141	"jb 1f\n\t"
				142
				143	/* Decide forward/backward copy mode */
				144	"cmp %2, %1\n\t"
				145	"jb 2f\n\t"
				146
				147	/*
				148	* movs instruction have many startup latency
				149	* so we handle small size by general register.
				150	*/
				151	"cmp $680, %0\n\t"
				152	"jb 3f\n\t"
				153	/* movs instruction is only good for aligned case */
				154	"mov %1, %3\n\t"
				155	"xor %2, %3\n\t"
				156	"and $0xff, %3\n\t"
				157	"jz 4f\n\t"
				158	"3:\n\t"
				159	"sub $0x10, %0\n\t"
				160
				161	/* We gobble 16 bytes forward in each loop */
				162	"3:\n\t"
				163	"sub $0x10, %0\n\t"
				164	"mov 0*4(%1), %3\n\t"
				165	"mov 1*4(%1), %4\n\t"
				166	"mov %3, 0*4(%2)\n\t"
				167	"mov %4, 1*4(%2)\n\t"
				168	"mov 2*4(%1), %3\n\t"
				169	"mov 3*4(%1), %4\n\t"
				170	"mov %3, 2*4(%2)\n\t"
				171	"mov %4, 3*4(%2)\n\t"
				172	"lea 0x10(%1), %1\n\t"
				173	"lea 0x10(%2), %2\n\t"
				174	"jae 3b\n\t"
				175	"add $0x10, %0\n\t"
				176	"jmp 1f\n\t"
				177
				178	/* Handle data forward by movs */
				179	".p2align 4\n\t"
				180	"4:\n\t"
				181	"mov -4(%1, %0), %3\n\t"
				182	"lea -4(%2, %0), %4\n\t"
				183	"shr $2, %0\n\t"
				184	"rep movsl\n\t"
				185	"mov %3, (%4)\n\t"
				186	"jmp 11f\n\t"
				187	/* Handle data backward by movs */
				188	".p2align 4\n\t"
				189	"6:\n\t"
				190	"mov (%1), %3\n\t"
				191	"mov %2, %4\n\t"
				192	"lea -4(%1, %0), %1\n\t"
				193	"lea -4(%2, %0), %2\n\t"
				194	"shr $2, %0\n\t"
				195	"std\n\t"
				196	"rep movsl\n\t"
				197	"mov %3,(%4)\n\t"
				198	"cld\n\t"
				199	"jmp 11f\n\t"
				200
				201	/* Start to prepare for backward copy */
				202	".p2align 4\n\t"
				203	"2:\n\t"
				204	"cmp $680, %0\n\t"
				205	"jb 5f\n\t"
				206	"mov %1, %3\n\t"
				207	"xor %2, %3\n\t"
				208	"and $0xff, %3\n\t"
				209	"jz 6b\n\t"
				210
				211	/* Calculate copy position to tail */
				212	"5:\n\t"
				213	"add %0, %1\n\t"
				214	"add %0, %2\n\t"
				215	"sub $0x10, %0\n\t"
				216
				217	/* We gobble 16 bytes backward in each loop */
				218	"7:\n\t"
				219	"sub $0x10, %0\n\t"
				220
				221	"mov -1*4(%1), %3\n\t"
				222	"mov -2*4(%1), %4\n\t"
				223	"mov %3, -1*4(%2)\n\t"
				224	"mov %4, -2*4(%2)\n\t"
				225	"mov -3*4(%1), %3\n\t"
				226	"mov -4*4(%1), %4\n\t"
				227	"mov %3, -3*4(%2)\n\t"
				228	"mov %4, -4*4(%2)\n\t"
				229	"lea -0x10(%1), %1\n\t"
				230	"lea -0x10(%2), %2\n\t"
				231	"jae 7b\n\t"
				232	/* Calculate copy position to head */
				233	"add $0x10, %0\n\t"
				234	"sub %0, %1\n\t"
				235	"sub %0, %2\n\t"
				236
				237	/* Move data from 8 bytes to 15 bytes */
				238	".p2align 4\n\t"
				239	"1:\n\t"
				240	"cmp $8, %0\n\t"
				241	"jb 8f\n\t"
				242	"mov 0*4(%1), %3\n\t"
				243	"mov 1*4(%1), %4\n\t"
				244	"mov -2*4(%1, %0), %5\n\t"
				245	"mov -1*4(%1, %0), %1\n\t"
				246
				247	"mov %3, 0*4(%2)\n\t"
				248	"mov %4, 1*4(%2)\n\t"
				249	"mov %5, -2*4(%2, %0)\n\t"
				250	"mov %1, -1*4(%2, %0)\n\t"
				251	"jmp 11f\n\t"
				252
				253	/* Move data from 4 bytes to 7 bytes */
				254	".p2align 4\n\t"
				255	"8:\n\t"
				256	"cmp $4, %0\n\t"
				257	"jb 9f\n\t"
				258	"mov 0*4(%1), %3\n\t"
				259	"mov -1*4(%1, %0), %4\n\t"
				260	"mov %3, 0*4(%2)\n\t"
				261	"mov %4, -1*4(%2, %0)\n\t"
				262	"jmp 11f\n\t"
				263
				264	/* Move data from 2 bytes to 3 bytes */
				265	".p2align 4\n\t"
				266	"9:\n\t"
				267	"cmp $2, %0\n\t"
				268	"jb 10f\n\t"
				269	"movw 0*2(%1), %%dx\n\t"
				270	"movw -1*2(%1, %0), %%bx\n\t"
				271	"movw %%dx, 0*2(%2)\n\t"
				272	"movw %%bx, -1*2(%2, %0)\n\t"
				273	"jmp 11f\n\t"
				274
				275	/* Move data for 1 byte */
				276	".p2align 4\n\t"
				277	"10:\n\t"
				278	"cmp $1, %0\n\t"
				279	"jb 11f\n\t"
				280	"movb (%1), %%cl\n\t"
				281	"movb %%cl, (%2)\n\t"
				282	".p2align 4\n\t"
				283	"11:"
				284	: "=&c" (d0), "=&S" (d1), "=&D" (d2),
				285	"=r" (d3), "=r" (d4), "=r"(d5)
				286	: "0" (n),
				287	"1" (src),
				288	"2" (dest)
				289	: "memory");
				290
				291	return ret;
				292	}