x86: Import glibc memcpy implementation

Taken from glibc version 2.14.90

--
Changes for v2:
 - None
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index 3aa6c11..0ad612f 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -14,7 +14,7 @@
 #undef __HAVE_ARCH_STRCHR
 extern char * strchr(const char * s, int c);
 
-#undef __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMCPY
 extern void * memcpy(void *, const void *, __kernel_size_t);
 
 #undef __HAVE_ARCH_MEMMOVE
diff --git a/arch/x86/lib/string.c b/arch/x86/lib/string.c
index f2ea7e4..1fde81b 100644
--- a/arch/x86/lib/string.c
+++ b/arch/x86/lib/string.c
@@ -85,3 +85,64 @@
 
 	return dstpp;
 }
+
+#define	OP_T_THRES	8
+#define OPSIZ	(sizeof(op_t))
+
+#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)				  \
+do {									  \
+	int __d0;							  \
+	asm volatile(							  \
+		/* Clear the direction flag, so copying goes forward.  */ \
+		"cld\n"							  \
+		/* Copy bytes.  */					  \
+		"rep\n"							  \
+		"movsb" :						  \
+		"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :		  \
+		"0" (dst_bp), "1" (src_bp), "2" (nbytes) :		  \
+		"memory");						  \
+} while (0)
+
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		  \
+do {									  \
+	int __d0;							  \
+	asm volatile(							  \
+		/* Clear the direction flag, so copying goes forward.  */ \
+		"cld\n"							  \
+		/* Copy longwords.  */					  \
+		"rep\n"							  \
+		"movsl" :						  \
+		"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :		  \
+		"0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) :	  \
+		"memory");						  \
+	(nbytes_left) = (nbytes) % 4;					  \
+} while (0)
+
+void *memcpy(void *dstpp, const void *srcpp, size_t len)
+{
+	unsigned long int dstp = (long int)dstpp;
+	unsigned long int srcp = (long int)srcpp;
+
+	/* Copy from the beginning to the end.  */
+
+	/* If there not too few bytes to copy, use word copy.  */
+	if (len >= OP_T_THRES) {
+		/* Copy just a few bytes to make DSTP aligned.  */
+		len -= (-dstp) % OPSIZ;
+		BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
+
+		/* Copy from SRCP to DSTP taking advantage of the known
+		 * alignment of DSTP.  Number of bytes remaining is put
+		 * in the third argument, i.e. in LEN.  This number may
+		 * vary from machine to machine.
+		 */
+		WORD_COPY_FWD(dstp, srcp, len, len);
+
+		/* Fall out and copy the tail.  */
+	}
+
+	/* There are just a few bytes to copy. Use byte memory operations. */
+	BYTE_COPY_FWD(dstp, srcp, len);
+
+	return dstpp;
+}