riscv: Add Zbb support for building U-Boot
This patch adds ISA string to the -march to generate zbb instructions
for U-Boot binaries, along with optimized string functions introduced
from Linux kernel.
Signed-off-by: Yu Chien Peter Lin <peterlin@andestech.com>
Reviewed-by: Leo Yu-Chi Liang <ycliang@andestech.com>
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 49b6e1a..e291456 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -187,6 +187,97 @@
riscv32 ABI from ilp32 to ilp32d and the riscv64 ABI from lp64 to
lp64d.
+config RISCV_ISA_ZBB
+ bool "Zbb extension support for bit manipulation instructions"
+ help
+ Adds ZBB extension (basic bit manipulation) to the ISA subsets
+ that the toolchain is allowed to emit when building U-Boot.
+ The Zbb extension provides instructions to accelerate a number
+ of bit-specific operations (count bit population, sign extending,
+ bitrotation, etc) and enables optimized string routines.
+
+menu "Use assembly optimized implementation of string routines"
+
+config USE_ARCH_STRLEN
+ bool "Use an assembly optimized implementation of strlen"
+ default y
+ depends on RISCV_ISA_ZBB
+ help
+ Enable the generation of an optimized version of strlen using
+ Zbb extension.
+
+config SPL_USE_ARCH_STRLEN
+ bool "Use an assembly optimized implementation of strlen for SPL"
+ default y if USE_ARCH_STRLEN
+ depends on RISCV_ISA_ZBB
+ depends on SPL
+ help
+ Enable the generation of an optimized version of strlen using
+ Zbb extension.
+
+config TPL_USE_ARCH_STRLEN
+ bool "Use an assembly optimized implementation of strlen for TPL"
+ default y if USE_ARCH_STRLEN
+ depends on RISCV_ISA_ZBB
+ depends on TPL
+ help
+ Enable the generation of an optimized version of strlen using
+ Zbb extension.
+
+config USE_ARCH_STRCMP
+ bool "Use an assembly optimized implementation of strcmp"
+ default y
+ depends on RISCV_ISA_ZBB
+ help
+ Enable the generation of an optimized version of strcmp using
+ Zbb extension.
+
+config SPL_USE_ARCH_STRCMP
+ bool "Use an assembly optimized implementation of strcmp for SPL"
+ default y if USE_ARCH_STRCMP
+ depends on RISCV_ISA_ZBB
+ depends on SPL
+ help
+ Enable the generation of an optimized version of strcmp using
+ Zbb extension.
+
+config TPL_USE_ARCH_STRCMP
+ bool "Use an assembly optimized implementation of strcmp for TPL"
+ default y if USE_ARCH_STRCMP
+ depends on RISCV_ISA_ZBB
+ depends on TPL
+ help
+ Enable the generation of an optimized version of strcmp using
+ Zbb extension.
+
+config USE_ARCH_STRNCMP
+ bool "Use an assembly optimized implementation of strncmp"
+ default y
+ depends on RISCV_ISA_ZBB
+ help
+ Enable the generation of an optimized version of strncmp using
+ Zbb extension.
+
+config SPL_USE_ARCH_STRNCMP
+ bool "Use an assembly optimized implementation of strncmp for SPL"
+ default y if USE_ARCH_STRNCMP
+ depends on RISCV_ISA_ZBB
+ depends on SPL
+ help
+ Enable the generation of an optimized version of strncmp using
+ Zbb extension.
+
+config TPL_USE_ARCH_STRNCMP
+ bool "Use an assembly optimized implementation of strncmp for TPL"
+ default y if USE_ARCH_STRNCMP
+ depends on RISCV_ISA_ZBB
+ depends on TPL
+ help
+ Enable the generation of an optimized version of strncmp using
+ Zbb extension.
+
+endmenu
+
config RISCV_ISA_A
def_bool y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4963b51..b3ef870 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -24,6 +24,9 @@
ifeq ($(CONFIG_RISCV_ISA_C),y)
ARCH_C = c
endif
+ifeq ($(CONFIG_RISCV_ISA_ZBB),y)
+ ARCH_ZBB = _zbb
+endif
ifeq ($(CONFIG_CMODEL_MEDLOW),y)
CMODEL = medlow
endif
@@ -32,7 +35,7 @@
endif
-RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C)
+RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C)$(ARCH_ZBB)
ABI = $(ABI_BASE)$(ABI_D)
# Newer binutils versions default to ISA spec version 20191213 which moves some
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 7dee3e4..38ad85f 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -40,4 +40,22 @@
#endif
extern void *memset(void *, int, __kernel_size_t);
+#undef __HAVE_ARCH_STRLEN
+#if CONFIG_IS_ENABLED(USE_ARCH_STRLEN)
+#define __HAVE_ARCH_STRLEN
+#endif
+extern __kernel_size_t strlen(const char *);
+
+#undef __HAVE_ARCH_STRCMP
+#if CONFIG_IS_ENABLED(USE_ARCH_STRCMP)
+#define __HAVE_ARCH_STRCMP
+#endif
+extern int strcmp(const char *, const char *);
+
+#undef __HAVE_ARCH_STRNCMP
+#if CONFIG_IS_ENABLED(USE_ARCH_STRNCMP)
+#define __HAVE_ARCH_STRNCMP
+#endif
+extern int strncmp(const char *, const char *, size_t __kernel_size_t);
+
#endif /* __ASM_RISCV_STRING_H */
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 02c4d8f..9a05b66 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -42,5 +42,8 @@
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMSET) += memset.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMMOVE) += memmove.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMCPY) += memcpy.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRLEN) += strlen_zbb.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRCMP) += strcmp_zbb.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRNCMP) += strncmp_zbb.o
obj-$(CONFIG_$(SPL_TPL_)SEMIHOSTING) += semihosting.o
diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
new file mode 100644
index 0000000..e5a367c
--- /dev/null
+++ b/arch/riscv/lib/strcmp_zbb.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strcmp.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ENTRY(__strcmp)
+WEAK(strcmp)
+.option push
+.option arch,+zbb
+ /*
+ * Returns
+ * a0 - comparison result, value like strcmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4
+ */
+
+ or t2, a0, a1
+ li t4, -1
+ and t2, t2, SZREG-1
+ bnez t2, 3f
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ REG_L t0, 0(a0)
+ REG_L t1, 0(a1)
+ orc.b t3, t0
+ bne t3, t4, 2f
+ addi a0, a0, SZREG
+ addi a1, a1, SZREG
+ beq t0, t1, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+
+ /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+ sltu a0, t0, t1
+ neg a0, a0
+ ori a0, a0, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne t0, t1, 3f
+
+ /* Otherwise, strings are equal. */
+ li a0, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+ .p2align 3
+3:
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne t0, t1, 4f
+ bnez t0, 3b
+
+4:
+ sub a0, t0, t1
+ ret
+.option pop
+END(__strcmp)
diff --git a/arch/riscv/lib/strlen_zbb.S b/arch/riscv/lib/strlen_zbb.S
new file mode 100644
index 0000000..bd8fa4c
--- /dev/null
+++ b/arch/riscv/lib/strlen_zbb.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strlen.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define CZ ctz
+# define SHIFT srl
+#else
+# define CZ clz
+# define SHIFT sll
+#endif
+
+ENTRY(__strlen)
+WEAK(strlen)
+.option push
+.option arch,+zbb
+ /*
+ * Returns
+ * a0 - string length
+ *
+ * Parameters
+ * a0 - String to measure
+ *
+ * Clobbers
+ * t0, t1, t2, t3
+ */
+
+ /* Number of irrelevant bytes in the first word. */
+ andi t2, a0, SZREG-1
+
+ /* Align pointer. */
+ andi t0, a0, -SZREG
+
+ li t3, SZREG
+ sub t3, t3, t2
+ slli t2, t2, 3
+
+ /* Get the first word. */
+ REG_L t1, 0(t0)
+
+ /*
+ * Shift away the partial data we loaded to remove the irrelevant bytes
+ * preceding the string with the effect of adding NUL bytes at the
+ * end of the string's first word.
+ */
+ SHIFT t1, t1, t2
+
+ /* Convert non-NUL into 0xff and NUL into 0x00. */
+ orc.b t1, t1
+
+ /* Convert non-NUL into 0x00 and NUL into 0xff. */
+ not t1, t1
+
+ /*
+ * Search for the first set bit (corresponding to a NUL byte in the
+ * original chunk).
+ */
+ CZ t1, t1
+
+ /*
+ * The first chunk is special: compare against the number
+ * of valid bytes in this chunk.
+ */
+ srli a0, t1, 3
+ bgtu t3, a0, 2f
+
+ /* Prepare for the word comparison loop. */
+ addi t2, t0, SZREG
+ li t3, -1
+
+ /*
+ * Our critical loop is 4 instructions and processes data in
+ * 4 byte or 8 byte chunks.
+ */
+ .p2align 3
+1:
+ REG_L t1, SZREG(t0)
+ addi t0, t0, SZREG
+ orc.b t1, t1
+ beq t1, t3, 1b
+
+ not t1, t1
+ CZ t1, t1
+ srli t1, t1, 3
+
+ /* Get number of processed bytes. */
+ sub t2, t0, t2
+
+ /* Add number of characters in the first word. */
+ add a0, a0, t2
+
+ /* Add number of characters in the last word. */
+ add a0, a0, t1
+2:
+ ret
+.option pop
+END(__strlen)
diff --git a/arch/riscv/lib/strncmp_zbb.S b/arch/riscv/lib/strncmp_zbb.S
new file mode 100644
index 0000000..00e0fec
--- /dev/null
+++ b/arch/riscv/lib/strncmp_zbb.S
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strncmp.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ENTRY(__strncmp)
+WEAK(strncmp)
+.option push
+.option arch,+zbb
+ /*
+ * Returns
+ * a0 - comparison result, like strncmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ * a2 - number of characters to compare
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4, t5, t6
+ */
+
+ or t2, a0, a1
+ li t5, -1
+ and t2, t2, SZREG-1
+ add t4, a0, a2
+ bnez t2, 3f
+
+ /* Adjust limit for fast-path. */
+ andi t6, t4, -SZREG
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ bge a0, t6, 3f
+ REG_L t0, 0(a0)
+ REG_L t1, 0(a1)
+ orc.b t3, t0
+ bne t3, t5, 2f
+ orc.b t3, t1
+ bne t3, t5, 2f
+ addi a0, a0, SZREG
+ addi a1, a1, SZREG
+ beq t0, t1, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+
+ /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+ sltu a0, t0, t1
+ neg a0, a0
+ ori a0, a0, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne t0, t1, 3f
+
+ /* Otherwise, strings are equal. */
+ li a0, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+ .p2align 3
+3:
+ bge a0, t4, 5f
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne t0, t1, 4f
+ bnez t0, 3b
+
+4:
+ sub a0, t0, t1
+ ret
+
+5:
+ li a0, 0
+ ret
+.option pop
+END(__strncmp)