arm64: Kconfig: Enable usage of optimized memset/memcpy/memmove

This patch enables the use of the optimized memset(), memmove() &
memcpy() versions recently added on ARM64.

Please note that these optimized functions are now only enabled for
recent GCC versions (>= 9.4), as earlier GCC versions throw these
errors:

aarch64-linux-ar: warning: arch/arm/lib/memset-arm64.o: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0000000
...

Signed-off-by: Stefan Roese <sr@denx.de>
[trini: Make this default to off as it causes problems on some platforms still]
Signed-off-by: Tom Rini <trini@konsulko.com>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 95102d3..9c4787f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -455,8 +455,8 @@
 
 config USE_ARCH_MEMCPY
 	bool "Use an assembly optimized implementation of memcpy"
-	default y
-	depends on !ARM64
+	default y if !ARM64
+	depends on !ARM64 || (ARM64 && (GCC_VERSION >= 90400))
 	help
 	  Enable the generation of an optimized version of memcpy.
 	  Such an implementation may be faster under some conditions
@@ -465,7 +465,7 @@
 config SPL_USE_ARCH_MEMCPY
 	bool "Use an assembly optimized implementation of memcpy for SPL"
 	default y if USE_ARCH_MEMCPY
-	depends on !ARM64 && SPL
+	depends on SPL
 	help
 	  Enable the generation of an optimized version of memcpy.
 	  Such an implementation may be faster under some conditions
@@ -474,16 +474,43 @@
 config TPL_USE_ARCH_MEMCPY
 	bool "Use an assembly optimized implementation of memcpy for TPL"
 	default y if USE_ARCH_MEMCPY
-	depends on !ARM64 && TPL
+	depends on TPL
 	help
 	  Enable the generation of an optimized version of memcpy.
 	  Such an implementation may be faster under some conditions
 	  but may increase the binary size.
 
+config USE_ARCH_MEMMOVE
+	bool "Use an assembly optimized implementation of memmove" if !ARM64
+	default USE_ARCH_MEMCPY if ARM64
+	depends on ARM64
+	help
+	  Enable the generation of an optimized version of memmove.
+	  Such an implementation may be faster under some conditions
+	  but may increase the binary size.
+
+config SPL_USE_ARCH_MEMMOVE
+	bool "Use an assembly optimized implementation of memmove for SPL" if !ARM64
+	default SPL_USE_ARCH_MEMCPY if ARM64
+	depends on SPL && ARM64
+	help
+	  Enable the generation of an optimized version of memmove.
+	  Such an implementation may be faster under some conditions
+	  but may increase the binary size.
+
+config TPL_USE_ARCH_MEMMOVE
+	bool "Use an assembly optimized implementation of memmove for TPL" if !ARM64
+	default TPL_USE_ARCH_MEMCPY if ARM64
+	depends on TPL && ARM64
+	help
+	  Enable the generation of an optimized version of memmove.
+	  Such an implementation may be faster under some conditions
+	  but may increase the binary size.
+
 config USE_ARCH_MEMSET
 	bool "Use an assembly optimized implementation of memset"
-	default y
-	depends on !ARM64
+	default y if !ARM64
+	depends on !ARM64 || (ARM64 && (GCC_VERSION >= 90400))
 	help
 	  Enable the generation of an optimized version of memset.
 	  Such an implementation may be faster under some conditions
@@ -492,7 +519,7 @@
 config SPL_USE_ARCH_MEMSET
 	bool "Use an assembly optimized implementation of memset for SPL"
 	default y if USE_ARCH_MEMSET
-	depends on !ARM64 && SPL
+	depends on SPL
 	help
 	  Enable the generation of an optimized version of memset.
 	  Such an implementation may be faster under some conditions
@@ -501,7 +528,7 @@
 config TPL_USE_ARCH_MEMSET
 	bool "Use an assembly optimized implementation of memset for TPL"
 	default y if USE_ARCH_MEMSET
-	depends on !ARM64 && TPL
+	depends on TPL
 	help
 	  Enable the generation of an optimized version of memset.
 	  Such an implementation may be faster under some conditions
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index 11eaa34..ead3f2c 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -19,7 +19,11 @@
 #endif
 extern void * memcpy(void *, const void *, __kernel_size_t);
 
+#if CONFIG_IS_ENABLED(USE_ARCH_MEMMOVE)
+#define __HAVE_ARCH_MEMMOVE
+#else
 #undef __HAVE_ARCH_MEMMOVE
+#endif
 extern void * memmove(void *, const void *, __kernel_size_t);
 
 #undef __HAVE_ARCH_MEMCHR