blob: 2177014ee1c514e0efa7112d136c2ad17ff36880 [file] [log] [blame]
Tom Rinif739fcd2018-05-07 17:02:21 -04001// SPDX-License-Identifier: GPL-2.0+
Rob Clark78178bb2017-09-09 06:47:40 -04002/*
3 * charset conversion utils
4 *
5 * Copyright (c) 2017 Rob Clark
Rob Clark78178bb2017-09-09 06:47:40 -04006 */
7
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +02008#include <common.h>
Rob Clark78178bb2017-09-09 06:47:40 -04009#include <charset.h>
Heinrich Schuchardtb5130a82018-09-04 19:34:56 +020010#include <capitalization.h>
Ilias Apalodimas6974a4a2020-11-22 15:10:26 +020011#include <efi_loader.h>
Rob Clark78178bb2017-09-09 06:47:40 -040012#include <malloc.h>
13
Heinrich Schuchardtb5130a82018-09-04 19:34:56 +020014static struct capitalization_table capitalization_table[] =
15#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
16 UNICODE_CAPITALIZATION_TABLE;
17#elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
18 CP1250_CAPITALIZATION_TABLE;
19#else
20 CP437_CAPITALIZATION_TABLE;
21#endif
22
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020023/**
24 * get_code() - read Unicode code point from UTF-8 stream
25 *
26 * @read_u8: - stream reader
27 * @src: - string buffer passed to stream reader, optional
28 * Return: - Unicode code point
29 */
30static int get_code(u8 (*read_u8)(void *data), void *data)
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020031{
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020032 s32 ch = 0;
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020033
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020034 ch = read_u8(data);
35 if (!ch)
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020036 return 0;
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020037 if (ch >= 0xc2 && ch <= 0xf4) {
38 int code = 0;
39
40 if (ch >= 0xe0) {
41 if (ch >= 0xf0) {
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020042 /* 0xf0 - 0xf4 */
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020043 ch &= 0x07;
44 code = ch << 18;
45 ch = read_u8(data);
46 if (ch < 0x80 || ch > 0xbf)
47 goto error;
48 ch &= 0x3f;
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020049 } else {
50 /* 0xe0 - 0xef */
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020051 ch &= 0x0f;
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020052 }
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020053 code += ch << 12;
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020054 if ((code >= 0xD800 && code <= 0xDFFF) ||
55 code >= 0x110000)
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020056 goto error;
57 ch = read_u8(data);
58 if (ch < 0x80 || ch > 0xbf)
59 goto error;
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020060 }
61 /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020062 ch &= 0x3f;
63 code += ch << 6;
64 ch = read_u8(data);
65 if (ch < 0x80 || ch > 0xbf)
66 goto error;
67 ch &= 0x3f;
68 ch += code;
69 } else if (ch >= 0x80) {
70 goto error;
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020071 }
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020072 return ch;
73error:
74 return '?';
75}
76
77/**
78 * read_string() - read byte from character string
79 *
80 * @data: - pointer to string
81 * Return: - byte read
82 *
83 * The string pointer is incremented if it does not point to '\0'.
84 */
85static u8 read_string(void *data)
86
87{
88 const char **src = (const char **)data;
89 u8 c;
90
91 if (!src || !*src || !**src)
92 return 0;
93 c = **src;
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +020094 ++*src;
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +020095 return c;
96}
97
98/**
99 * read_console() - read byte from console
100 *
Heinrich Schuchardt60d79872018-10-02 06:43:38 +0200101 * @data - not used, needed to match interface
102 * Return: - byte read or 0 on error
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +0200103 */
104static u8 read_console(void *data)
105{
Heinrich Schuchardt60d79872018-10-02 06:43:38 +0200106 int ch;
107
Heinrich Schuchardtc670aee2020-10-07 18:11:48 +0200108 ch = getchar();
Heinrich Schuchardt60d79872018-10-02 06:43:38 +0200109 if (ch < 0)
110 ch = 0;
111 return ch;
Heinrich Schuchardt35cbb792018-09-12 00:05:32 +0200112}
113
114int console_read_unicode(s32 *code)
115{
116 if (!tstc()) {
117 /* No input available */
118 return 1;
119 }
120
121 /* Read Unicode code */
122 *code = get_code(read_console, NULL);
123 return 0;
124}
125
126s32 utf8_get(const char **src)
127{
128 return get_code(read_string, src);
Heinrich Schuchardtd8c28232018-08-31 21:31:27 +0200129}
130
131int utf8_put(s32 code, char **dst)
132{
133 if (!dst || !*dst)
134 return -1;
135 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
136 return -1;
137 if (code <= 0x007F) {
138 **dst = code;
139 } else {
140 if (code <= 0x07FF) {
141 **dst = code >> 6 | 0xC0;
142 } else {
143 if (code < 0x10000) {
144 **dst = code >> 12 | 0xE0;
145 } else {
146 **dst = code >> 18 | 0xF0;
147 ++*dst;
148 **dst = (code >> 12 & 0x3F) | 0x80;
149 }
150 ++*dst;
151 **dst = (code >> 6 & 0x3F) | 0x80;
152 }
153 ++*dst;
154 **dst = (code & 0x3F) | 0x80;
155 }
156 ++*dst;
157 return 0;
158}
159
160size_t utf8_utf16_strnlen(const char *src, size_t count)
161{
162 size_t len = 0;
163
164 for (; *src && count; --count) {
165 s32 code = utf8_get(&src);
166
167 if (!code)
168 break;
169 if (code < 0) {
170 /* Reserve space for a replacement character */
171 len += 1;
172 } else if (code < 0x10000) {
173 len += 1;
174 } else {
175 len += 2;
176 }
177 }
178 return len;
179}
180
181int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
182{
183 if (!src || !dst || !*dst)
184 return -1;
185
186 for (; count && *src; --count) {
187 s32 code = utf8_get(&src);
188
189 if (code < 0)
190 code = '?';
191 utf16_put(code, dst);
192 }
193 **dst = 0;
194 return 0;
195}
196
197s32 utf16_get(const u16 **src)
198{
199 s32 code, code2;
200
201 if (!src || !*src)
202 return -1;
203 if (!**src)
204 return 0;
205 code = **src;
206 ++*src;
207 if (code >= 0xDC00 && code <= 0xDFFF)
208 return -1;
209 if (code >= 0xD800 && code <= 0xDBFF) {
210 if (!**src)
211 return -1;
212 code &= 0x3ff;
213 code <<= 10;
214 code += 0x10000;
215 code2 = **src;
216 ++*src;
217 if (code2 <= 0xDC00 || code2 >= 0xDFFF)
218 return -1;
219 code2 &= 0x3ff;
220 code += code2;
221 }
222 return code;
223}
224
225int utf16_put(s32 code, u16 **dst)
226{
227 if (!dst || !*dst)
228 return -1;
229 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
230 return -1;
231 if (code < 0x10000) {
232 **dst = code;
233 } else {
234 code -= 0x10000;
235 **dst = code >> 10 | 0xD800;
236 ++*dst;
237 **dst = (code & 0x3ff) | 0xDC00;
238 }
239 ++*dst;
240 return 0;
241}
242
243size_t utf16_strnlen(const u16 *src, size_t count)
244{
245 size_t len = 0;
246
247 for (; *src && count; --count) {
248 s32 code = utf16_get(&src);
249
250 if (!code)
251 break;
252 /*
253 * In case of an illegal sequence still reserve space for a
254 * replacement character.
255 */
256 ++len;
257 }
258 return len;
259}
260
261size_t utf16_utf8_strnlen(const u16 *src, size_t count)
262{
263 size_t len = 0;
264
265 for (; *src && count; --count) {
266 s32 code = utf16_get(&src);
267
268 if (!code)
269 break;
270 if (code < 0)
271 /* Reserve space for a replacement character */
272 len += 1;
273 else if (code < 0x80)
274 len += 1;
275 else if (code < 0x800)
276 len += 2;
277 else if (code < 0x10000)
278 len += 3;
279 else
280 len += 4;
281 }
282 return len;
283}
284
285int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
286{
287 if (!src || !dst || !*dst)
288 return -1;
289
290 for (; count && *src; --count) {
291 s32 code = utf16_get(&src);
292
293 if (code < 0)
294 code = '?';
295 utf8_put(code, dst);
296 }
297 **dst = 0;
298 return 0;
299}
300
Heinrich Schuchardtb5130a82018-09-04 19:34:56 +0200301s32 utf_to_lower(const s32 code)
302{
303 struct capitalization_table *pos = capitalization_table;
304 s32 ret = code;
305
306 if (code <= 0x7f) {
307 if (code >= 'A' && code <= 'Z')
308 ret += 0x20;
309 return ret;
310 }
311 for (; pos->upper; ++pos) {
312 if (pos->upper == code) {
313 ret = pos->lower;
314 break;
315 }
316 }
317 return ret;
318}
319
320s32 utf_to_upper(const s32 code)
321{
322 struct capitalization_table *pos = capitalization_table;
323 s32 ret = code;
324
325 if (code <= 0x7f) {
326 if (code >= 'a' && code <= 'z')
327 ret -= 0x20;
328 return ret;
329 }
330 for (; pos->lower; ++pos) {
331 if (pos->lower == code) {
332 ret = pos->upper;
333 break;
334 }
335 }
336 return ret;
337}
Rob Clark78178bb2017-09-09 06:47:40 -0400338
AKASHI Takahirof8062c92019-09-18 10:26:29 +0900339/*
340 * u16_strncmp() - compare two u16 string
341 *
342 * @s1: first string to compare
343 * @s2: second string to compare
344 * @n: maximum number of u16 to compare
345 * Return: 0 if the first n u16 are the same in s1 and s2
346 * < 0 if the first different u16 in s1 is less than the
347 * corresponding u16 in s2
348 * > 0 if the first different u16 in s1 is greater than the
349 * corresponding u16 in s2
350 */
351int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)
352{
353 int ret = 0;
354
355 for (; n; --n, ++s1, ++s2) {
356 ret = *s1 - *s2;
357 if (ret || !*s1)
358 break;
359 }
360
361 return ret;
362}
363
Heinrich Schuchardt317068b2019-07-14 17:28:49 +0200364size_t u16_strlen(const void *in)
Rob Clark78178bb2017-09-09 06:47:40 -0400365{
Heinrich Schuchardt317068b2019-07-14 17:28:49 +0200366 const char *pos = in;
367 size_t ret;
368
369 for (; pos[0] || pos[1]; pos += 2)
370 ;
371 ret = pos - (char *)in;
372 ret >>= 1;
373 return ret;
Rob Clark78178bb2017-09-09 06:47:40 -0400374}
375
Ilias Apalodimas6974a4a2020-11-22 15:10:26 +0200376size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
Rob Clark78178bb2017-09-09 06:47:40 -0400377{
378 size_t i;
379 for (i = 0; count-- && in[i]; i++);
380 return i;
381}
382
Sughosh Ganu4835d352020-05-06 22:12:41 +0300383size_t u16_strsize(const void *in)
384{
385 return (u16_strlen(in) + 1) * sizeof(u16);
386}
387
Akashi, Takahiro2a3537a2018-12-14 19:10:38 +0900388u16 *u16_strcpy(u16 *dest, const u16 *src)
389{
390 u16 *tmp = dest;
391
392 for (;; dest++, src++) {
393 *dest = *src;
394 if (!*src)
395 break;
396 }
397
398 return tmp;
399}
400
Heinrich Schuchardt317068b2019-07-14 17:28:49 +0200401u16 *u16_strdup(const void *src)
Akashi, Takahiro2a3537a2018-12-14 19:10:38 +0900402{
403 u16 *new;
Heinrich Schuchardt317068b2019-07-14 17:28:49 +0200404 size_t len;
Akashi, Takahiro2a3537a2018-12-14 19:10:38 +0900405
406 if (!src)
407 return NULL;
Heinrich Schuchardt317068b2019-07-14 17:28:49 +0200408 len = (u16_strlen(src) + 1) * sizeof(u16);
409 new = malloc(len);
Akashi, Takahiro2a3537a2018-12-14 19:10:38 +0900410 if (!new)
411 return NULL;
Heinrich Schuchardt317068b2019-07-14 17:28:49 +0200412 memcpy(new, src, len);
Akashi, Takahiro2a3537a2018-12-14 19:10:38 +0900413
414 return new;
415}
416
Rob Clark78178bb2017-09-09 06:47:40 -0400417/* Convert UTF-16 to UTF-8. */
418uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
419{
420 uint32_t code_high = 0;
421
422 while (size--) {
423 uint32_t code = *src++;
424
425 if (code_high) {
426 if (code >= 0xDC00 && code <= 0xDFFF) {
427 /* Surrogate pair. */
428 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
429
430 *dest++ = (code >> 18) | 0xF0;
431 *dest++ = ((code >> 12) & 0x3F) | 0x80;
432 *dest++ = ((code >> 6) & 0x3F) | 0x80;
433 *dest++ = (code & 0x3F) | 0x80;
434 } else {
435 /* Error... */
436 *dest++ = '?';
437 /* *src may be valid. Don't eat it. */
438 src--;
439 }
440
441 code_high = 0;
442 } else {
443 if (code <= 0x007F) {
444 *dest++ = code;
445 } else if (code <= 0x07FF) {
446 *dest++ = (code >> 6) | 0xC0;
447 *dest++ = (code & 0x3F) | 0x80;
448 } else if (code >= 0xD800 && code <= 0xDBFF) {
449 code_high = code;
450 continue;
451 } else if (code >= 0xDC00 && code <= 0xDFFF) {
452 /* Error... */
453 *dest++ = '?';
454 } else if (code < 0x10000) {
455 *dest++ = (code >> 12) | 0xE0;
456 *dest++ = ((code >> 6) & 0x3F) | 0x80;
457 *dest++ = (code & 0x3F) | 0x80;
458 } else {
459 *dest++ = (code >> 18) | 0xF0;
460 *dest++ = ((code >> 12) & 0x3F) | 0x80;
461 *dest++ = ((code >> 6) & 0x3F) | 0x80;
462 *dest++ = (code & 0x3F) | 0x80;
463 }
464 }
465 }
466
467 return dest;
468}