Blame - arch/x86/lib/string.c - platform/external/u-boot

blob: 5343c2b6fd5b06ed4e2818bc014e2972cb698a11 [file] [log] [blame]

Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
				3	* This file is part of the GNU C Library.
				4	* Copyright (c) 2011 The Chromium OS Authors.
				5	*
Wolfgang Denk	1a45966	2013-07-08 09:37:19 +0200	[diff] [blame]	6	* SPDX-License-Identifier: GPL-2.0+
Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	7	*/
				8
				9	/* From glibc-2.14, sysdeps/i386/memset.c */
				10
Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	11	#include <linux/types.h>
Masahiro Yamada	afc366f	2014-11-26 16:00:58 +0900	[diff] [blame]	12	#include <linux/compiler.h>
				13	#include <asm/string.h>
Gabe Black	dbaef6e	2011-11-14 14:47:18 +0000	[diff] [blame]	14
				15	typedef uint32_t op_t;
				16
				17	void memset(void dstpp, int c, size_t len)
				18	{
				19	int d0;
				20	unsigned long int dstp = (unsigned long int) dstpp;
				21
				22	/* This explicit register allocation improves code very much indeed. */
				23	register op_t x asm("ax");
				24
				25	x = (unsigned char) c;
				26
				27	/* Clear the direction flag, so filling will move forward. */
				28	asm volatile("cld");
				29
				30	/* This threshold value is optimal. */
				31	if (len >= 12) {
				32	/* Fill X with four copies of the char we want to fill with. */
				33	x \|= (x << 8);
				34	x \|= (x << 16);
				35
				36	/* Adjust LEN for the bytes handled in the first loop. */
				37	len -= (-dstp) % sizeof(op_t);
				38
				39	/*
				40	* There are at least some bytes to set. No need to test for
				41	* LEN == 0 in this alignment loop.
				42	*/
				43
				44	/* Fill bytes until DSTP is aligned on a longword boundary. */
				45	asm volatile(
				46	"rep\n"
				47	"stosb" /* %0, %2, %3 */ :
				48	"=D" (dstp), "=c" (d0) :
				49	"0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
				50	"memory");
				51
				52	/* Fill longwords. */
				53	asm volatile(
				54	"rep\n"
				55	"stosl" /* %0, %2, %3 */ :
				56	"=D" (dstp), "=c" (d0) :
				57	"0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
				58	"memory");
				59	len %= sizeof(op_t);
				60	}
				61
				62	/* Write the last few bytes. */
				63	asm volatile(
				64	"rep\n"
				65	"stosb" /* %0, %2, %3 */ :
				66	"=D" (dstp), "=c" (d0) :
				67	"0" (dstp), "1" (len), "a" (x) :
				68	"memory");
				69
				70	return dstpp;
				71	}
Graeme Russ	b2c2a03	2011-12-27 22:46:39 +1100	[diff] [blame]	72
				73	#define OP_T_THRES 8
				74	#define OPSIZ (sizeof(op_t))
				75
				76	#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
				77	do { \
				78	int __d0; \
				79	asm volatile( \
				80	/* Clear the direction flag, so copying goes forward. */ \
				81	"cld\n" \
				82	/* Copy bytes. */ \
				83	"rep\n" \
				84	"movsb" : \
				85	"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
				86	"0" (dst_bp), "1" (src_bp), "2" (nbytes) : \
				87	"memory"); \
				88	} while (0)
				89
				90	#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
				91	do { \
				92	int __d0; \
				93	asm volatile( \
				94	/* Clear the direction flag, so copying goes forward. */ \
				95	"cld\n" \
				96	/* Copy longwords. */ \
				97	"rep\n" \
				98	"movsl" : \
				99	"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
				100	"0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \
				101	"memory"); \
				102	(nbytes_left) = (nbytes) % 4; \
				103	} while (0)
				104
				105	void memcpy(void dstpp, const void *srcpp, size_t len)
				106	{
				107	unsigned long int dstp = (long int)dstpp;
				108	unsigned long int srcp = (long int)srcpp;
				109
				110	/* Copy from the beginning to the end. */
				111
				112	/* If there not too few bytes to copy, use word copy. */
				113	if (len >= OP_T_THRES) {
				114	/* Copy just a few bytes to make DSTP aligned. */
				115	len -= (-dstp) % OPSIZ;
				116	BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
				117
				118	/* Copy from SRCP to DSTP taking advantage of the known
				119	* alignment of DSTP. Number of bytes remaining is put
				120	* in the third argument, i.e. in LEN. This number may
				121	* vary from machine to machine.
				122	*/
				123	WORD_COPY_FWD(dstp, srcp, len, len);
				124
				125	/* Fall out and copy the tail. */
				126	}
				127
				128	/* There are just a few bytes to copy. Use byte memory operations. */
				129	BYTE_COPY_FWD(dstp, srcp, len);
				130
				131	return dstpp;
				132	}
Simon Glass	a5b8722	2016-10-05 20:42:10 -0600	[diff] [blame]	133
				134	void memmove(void dest, const void *src, size_t n)
				135	{
				136	int d0, d1, d2, d3, d4, d5;
				137	char *ret = dest;
				138
				139	__asm__ __volatile__(
				140	/* Handle more 16 bytes in loop */
				141	"cmp $0x10, %0\n\t"
				142	"jb 1f\n\t"
				143
				144	/* Decide forward/backward copy mode */
				145	"cmp %2, %1\n\t"
				146	"jb 2f\n\t"
				147
				148	/*
				149	* movs instruction have many startup latency
				150	* so we handle small size by general register.
				151	*/
				152	"cmp $680, %0\n\t"
				153	"jb 3f\n\t"
				154	/* movs instruction is only good for aligned case */
				155	"mov %1, %3\n\t"
				156	"xor %2, %3\n\t"
				157	"and $0xff, %3\n\t"
				158	"jz 4f\n\t"
				159	"3:\n\t"
				160	"sub $0x10, %0\n\t"
				161
				162	/* We gobble 16 bytes forward in each loop */
				163	"3:\n\t"
				164	"sub $0x10, %0\n\t"
				165	"mov 0*4(%1), %3\n\t"
				166	"mov 1*4(%1), %4\n\t"
				167	"mov %3, 0*4(%2)\n\t"
				168	"mov %4, 1*4(%2)\n\t"
				169	"mov 2*4(%1), %3\n\t"
				170	"mov 3*4(%1), %4\n\t"
				171	"mov %3, 2*4(%2)\n\t"
				172	"mov %4, 3*4(%2)\n\t"
				173	"lea 0x10(%1), %1\n\t"
				174	"lea 0x10(%2), %2\n\t"
				175	"jae 3b\n\t"
				176	"add $0x10, %0\n\t"
				177	"jmp 1f\n\t"
				178
				179	/* Handle data forward by movs */
				180	".p2align 4\n\t"
				181	"4:\n\t"
				182	"mov -4(%1, %0), %3\n\t"
				183	"lea -4(%2, %0), %4\n\t"
				184	"shr $2, %0\n\t"
				185	"rep movsl\n\t"
				186	"mov %3, (%4)\n\t"
				187	"jmp 11f\n\t"
				188	/* Handle data backward by movs */
				189	".p2align 4\n\t"
				190	"6:\n\t"
				191	"mov (%1), %3\n\t"
				192	"mov %2, %4\n\t"
				193	"lea -4(%1, %0), %1\n\t"
				194	"lea -4(%2, %0), %2\n\t"
				195	"shr $2, %0\n\t"
				196	"std\n\t"
				197	"rep movsl\n\t"
				198	"mov %3,(%4)\n\t"
				199	"cld\n\t"
				200	"jmp 11f\n\t"
				201
				202	/* Start to prepare for backward copy */
				203	".p2align 4\n\t"
				204	"2:\n\t"
				205	"cmp $680, %0\n\t"
				206	"jb 5f\n\t"
				207	"mov %1, %3\n\t"
				208	"xor %2, %3\n\t"
				209	"and $0xff, %3\n\t"
				210	"jz 6b\n\t"
				211
				212	/* Calculate copy position to tail */
				213	"5:\n\t"
				214	"add %0, %1\n\t"
				215	"add %0, %2\n\t"
				216	"sub $0x10, %0\n\t"
				217
				218	/* We gobble 16 bytes backward in each loop */
				219	"7:\n\t"
				220	"sub $0x10, %0\n\t"
				221
				222	"mov -1*4(%1), %3\n\t"
				223	"mov -2*4(%1), %4\n\t"
				224	"mov %3, -1*4(%2)\n\t"
				225	"mov %4, -2*4(%2)\n\t"
				226	"mov -3*4(%1), %3\n\t"
				227	"mov -4*4(%1), %4\n\t"
				228	"mov %3, -3*4(%2)\n\t"
				229	"mov %4, -4*4(%2)\n\t"
				230	"lea -0x10(%1), %1\n\t"
				231	"lea -0x10(%2), %2\n\t"
				232	"jae 7b\n\t"
				233	/* Calculate copy position to head */
				234	"add $0x10, %0\n\t"
				235	"sub %0, %1\n\t"
				236	"sub %0, %2\n\t"
				237
				238	/* Move data from 8 bytes to 15 bytes */
				239	".p2align 4\n\t"
				240	"1:\n\t"
				241	"cmp $8, %0\n\t"
				242	"jb 8f\n\t"
				243	"mov 0*4(%1), %3\n\t"
				244	"mov 1*4(%1), %4\n\t"
				245	"mov -2*4(%1, %0), %5\n\t"
				246	"mov -1*4(%1, %0), %1\n\t"
				247
				248	"mov %3, 0*4(%2)\n\t"
				249	"mov %4, 1*4(%2)\n\t"
				250	"mov %5, -2*4(%2, %0)\n\t"
				251	"mov %1, -1*4(%2, %0)\n\t"
				252	"jmp 11f\n\t"
				253
				254	/* Move data from 4 bytes to 7 bytes */
				255	".p2align 4\n\t"
				256	"8:\n\t"
				257	"cmp $4, %0\n\t"
				258	"jb 9f\n\t"
				259	"mov 0*4(%1), %3\n\t"
				260	"mov -1*4(%1, %0), %4\n\t"
				261	"mov %3, 0*4(%2)\n\t"
				262	"mov %4, -1*4(%2, %0)\n\t"
				263	"jmp 11f\n\t"
				264
				265	/* Move data from 2 bytes to 3 bytes */
				266	".p2align 4\n\t"
				267	"9:\n\t"
				268	"cmp $2, %0\n\t"
				269	"jb 10f\n\t"
				270	"movw 0*2(%1), %%dx\n\t"
				271	"movw -1*2(%1, %0), %%bx\n\t"
				272	"movw %%dx, 0*2(%2)\n\t"
				273	"movw %%bx, -1*2(%2, %0)\n\t"
				274	"jmp 11f\n\t"
				275
				276	/* Move data for 1 byte */
				277	".p2align 4\n\t"
				278	"10:\n\t"
				279	"cmp $1, %0\n\t"
				280	"jb 11f\n\t"
				281	"movb (%1), %%cl\n\t"
				282	"movb %%cl, (%2)\n\t"
				283	".p2align 4\n\t"
				284	"11:"
				285	: "=&c" (d0), "=&S" (d1), "=&D" (d2),
				286	"=r" (d3), "=r" (d4), "=r"(d5)
				287	: "0" (n),
				288	"1" (src),
				289	"2" (dest)
				290	: "memory");
				291
				292	return ret;
				293	}