efi_loader: carve out utf_to_cp()
Carve out a function to translate a Unicode code point to an 8bit codepage.
Provide a unit test for the new function.
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
diff --git a/include/charset.h b/include/charset.h
index 64ba91f..52e7d14 100644
--- a/include/charset.h
+++ b/include/charset.h
@@ -275,4 +275,15 @@
*/
uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
+/**
+ * utf_to_cp() - translate Unicode code point to 8bit codepage
+ *
+ * Codepoints that do not exist in the codepage are rendered as question mark.
+ *
+ * @c: pointer to Unicode code point to be translated
+ * @codepage: Unicode to codepage translation table
+ * Return: 0 on success, -ENOENT if codepoint cannot be translated
+ */
+int utf_to_cp(s32 *c, const u16 *codepage);
+
#endif /* __CHARSET_H_ */
diff --git a/lib/charset.c b/lib/charset.c
index 814847d..1345c8f 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -10,6 +10,7 @@
#include <capitalization.h>
#include <cp437.h>
#include <efi_loader.h>
+#include <errno.h>
#include <malloc.h>
/**
@@ -472,3 +473,30 @@
return dest;
}
+
+/**
+ * utf_to_cp() - translate Unicode code point to 8bit codepage
+ *
+ * Codepoints that do not exist in the codepage are rendered as question mark.
+ *
+ * @c: pointer to Unicode code point to be translated
+ * @codepage: Unicode to codepage translation table
+ * Return: 0 on success, -ENOENT if codepoint cannot be translated
+ */
+int utf_to_cp(s32 *c, const u16 *codepage)
+{
+ if (*c >= 0x80) {
+ int j;
+
+ /* Look up codepage translation */
+ for (j = 0; j < 0x80; ++j) {
+ if (*c == codepage[j]) {
+ *c = j + 0x80;
+ return 0;
+ }
+ }
+ *c = '?';
+ return -ENOENT;
+ }
+ return 0;
+}
diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c
index bf5314c..36be798 100644
--- a/lib/efi_loader/efi_unicode_collation.c
+++ b/lib/efi_loader/efi_unicode_collation.c
@@ -300,23 +300,10 @@
break;
}
c = utf_to_upper(c);
- if (c >= 0x80) {
- int j;
-
- /* Look for codepage translation */
- for (j = 0; j < 0x80; ++j) {
- if (c == codepage[j]) {
- c = j + 0x80;
- break;
- }
- }
- if (j >= 0x80) {
- c = '_';
- ret = true;
- }
- } else if (c && (c < 0x20 || strchr(illegal, c))) {
- c = '_';
+ if (utf_to_cp(&c, codepage) ||
+ (c && (c < 0x20 || strchr(illegal, c)))) {
ret = true;
+ c = '_';
}
fat[i] = c;
diff --git a/test/unicode_ut.c b/test/unicode_ut.c
index 6130ef0..2cc6b5f 100644
--- a/test/unicode_ut.c
+++ b/test/unicode_ut.c
@@ -595,6 +595,35 @@
}
UNICODE_TEST(unicode_test_u16_strsize);
+static int unicode_test_utf_to_cp(struct unit_test_state *uts)
+{
+ int ret;
+ s32 c;
+
+ c = '\n';
+ ret = utf_to_cp(&c, codepage_437);
+ ut_asserteq(0, ret);
+ ut_asserteq('\n', c);
+
+ c = 'a';
+ ret = utf_to_cp(&c, codepage_437);
+ ut_asserteq(0, ret);
+ ut_asserteq('a', c);
+
+ c = 0x03c4; /* Greek small letter tau */
+ ret = utf_to_cp(&c, codepage_437);
+ ut_asserteq(0, ret);
+ ut_asserteq(0xe7, c);
+
+ c = 0x03a4; /* Greek capital letter tau */
+ ret = utf_to_cp(&c, codepage_437);
+ ut_asserteq(-ENOENT, ret);
+ ut_asserteq('?', c);
+
+ return 0;
+}
+UNICODE_TEST(unicode_test_utf_to_cp);
+
#ifdef CONFIG_EFI_LOADER
static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts)
{