blob: 337ab8ce5ddd94a551a851b764705fd8dec339c4 [file] [log] [blame]
Roland Gaudig9571f1a2021-07-23 12:29:19 +00001/* vi: set sw=4 ts=4: */
2/*
3 * printf - format and print data
4 *
5 * Copyright 1999 Dave Cinege
6 * Portions copyright (C) 1990-1996 Free Software Foundation, Inc.
7 *
8 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
9 */
10/* Usage: printf format [argument...]
11 *
12 * A front end to the printf function that lets it be used from the shell.
13 *
14 * Backslash escapes:
15 *
16 * \" = double quote
17 * \\ = backslash
18 * \a = alert (bell)
19 * \b = backspace
20 * \c = produce no further output
21 * \f = form feed
22 * \n = new line
23 * \r = carriage return
24 * \t = horizontal tab
25 * \v = vertical tab
26 * \0ooo = octal number (ooo is 0 to 3 digits)
27 * \xhhh = hexadecimal number (hhh is 1 to 3 digits)
28 *
29 * Additional directive:
30 *
31 * %b = print an argument string, interpreting backslash escapes
32 *
33 * The 'format' argument is re-used as many times as necessary
34 * to convert all of the given arguments.
35 *
36 * David MacKenzie <djm@gnu.ai.mit.edu>
37 */
38/* 19990508 Busy Boxed! Dave Cinege */
39
40//config:config PRINTF
41//config: bool "printf (3.8 kb)"
42//config: default y
43//config: help
44//config: printf is used to format and print specified strings.
45//config: It's similar to 'echo' except it has more options.
46
47//applet:IF_PRINTF(APPLET_NOFORK(printf, printf, BB_DIR_USR_BIN, BB_SUID_DROP, printf))
48
49//kbuild:lib-$(CONFIG_PRINTF) += printf.o
50//kbuild:lib-$(CONFIG_ASH_PRINTF) += printf.o
51//kbuild:lib-$(CONFIG_HUSH_PRINTF) += printf.o
52
53//usage:#define printf_trivial_usage
54//usage: "FORMAT [ARG]..."
55//usage:#define printf_full_usage "\n\n"
56//usage: "Format and print ARG(s) according to FORMAT (a-la C printf)"
57//usage:
58//usage:#define printf_example_usage
59//usage: "$ printf \"Val=%d\\n\" 5\n"
60//usage: "Val=5\n"
61
62#include "libbb.h"
63
64/* A note on bad input: neither bash 3.2 nor coreutils 6.10 stop on it.
65 * They report it:
66 * bash: printf: XXX: invalid number
67 * printf: XXX: expected a numeric value
68 * bash: printf: 123XXX: invalid number
69 * printf: 123XXX: value not completely converted
70 * but then they use 0 (or partially converted numeric prefix) as a value
71 * and continue. They exit with 1 in this case.
72 * Both accept insane field width/precision (e.g. %9999999999.9999999999d).
73 * Both print error message and assume 0 if %*.*f width/precision is "bad"
74 * (but negative numbers are not "bad").
75 * Both accept negative numbers for %u specifier.
76 *
77 * We try to be compatible.
78 */
79
80typedef void FAST_FUNC (*converter)(const char *arg, void *result);
81
Roland Gaudig6244cda2021-07-23 12:29:20 +000082#define WANT_HEX_ESCAPES 0
83
84/* Usual "this only works for ascii compatible encodings" disclaimer. */
85#undef _tolower
86#define _tolower(X) ((X)|((char) 0x20))
87
88char FAST_FUNC bb_process_escape_sequence(const char **ptr)
89{
90 const char *q;
91 unsigned num_digits;
92 unsigned n;
93 unsigned base;
94
95 num_digits = n = 0;
96 base = 8;
97 q = *ptr;
98
99 if (WANT_HEX_ESCAPES && *q == 'x') {
100 ++q;
101 base = 16;
102 ++num_digits;
103 }
104
105 /* bash requires leading 0 in octal escapes:
106 * \02 works, \2 does not (prints \ and 2).
107 * We treat \2 as a valid octal escape sequence. */
108 do {
109 unsigned r;
110 unsigned d = (unsigned char)(*q) - '0';
111#if WANT_HEX_ESCAPES
112 if (d >= 10) {
113 d = (unsigned char)_tolower(*q) - 'a';
114 //d += 10;
115 /* The above would map 'A'-'F' and 'a'-'f' to 10-15,
116 * however, some chars like '@' would map to 9 < base.
117 * Do not allow that, map invalid chars to N > base:
118 */
119 if ((int)d >= 0)
120 d += 10;
121 }
122#endif
123 if (d >= base) {
124 if (WANT_HEX_ESCAPES && base == 16) {
125 --num_digits;
126 if (num_digits == 0) {
127 /* \x<bad_char>: return '\',
128 * leave ptr pointing to x */
129 return '\\';
130 }
131 }
132 break;
133 }
134
135 r = n * base + d;
136 if (r > UCHAR_MAX) {
137 break;
138 }
139
140 n = r;
141 ++q;
142 } while (++num_digits < 3);
143
144 if (num_digits == 0) {
145 /* Not octal or hex escape sequence.
146 * Is it one-letter one? */
147
148 /* bash builtin "echo -e '\ec'" interprets \e as ESC,
149 * but coreutils "/bin/echo -e '\ec'" does not.
150 * Manpages tend to support coreutils way.
151 * Update: coreutils added support for \e on 28 Oct 2009. */
152 static const char charmap[] ALIGN1 = {
153 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v', '\\', '\0',
154 '\a', '\b', 27, '\f', '\n', '\r', '\t', '\v', '\\', '\\',
155 };
156 const char *p = charmap;
157 do {
158 if (*p == *q) {
159 q++;
160 break;
161 }
162 } while (*++p != '\0');
163 /* p points to found escape char or NUL,
164 * advance it and find what it translates to.
165 * Note that \NUL and unrecognized sequence \z return '\'
166 * and leave ptr pointing to NUL or z. */
167 n = p[sizeof(charmap) / 2];
168 }
169
170 *ptr = q;
171
172 return (char) n;
173}
174
175char* FAST_FUNC skip_whitespace(const char *s)
176{
177 /* In POSIX/C locale (the only locale we care about: do we REALLY want
178 * to allow Unicode whitespace in, say, .conf files? nuts!)
179 * isspace is only these chars: "\t\n\v\f\r" and space.
180 * "\t\n\v\f\r" happen to have ASCII codes 9,10,11,12,13.
181 * Use that.
182 */
183 while (*s == ' ' || (unsigned char)(*s - 9) <= (13 - 9))
184 s++;
185
186 return (char *) s;
187}
188
189/* Like strcpy but can copy overlapping strings. */
190void FAST_FUNC overlapping_strcpy(char *dst, const char *src)
191{
192 /* Cheap optimization for dst == src case -
193 * better to have it here than in many callers.
194 */
195 if (dst != src) {
196 while ((*dst = *src) != '\0') {
197 dst++;
198 src++;
199 }
200 }
201}
202
Roland Gaudig9571f1a2021-07-23 12:29:19 +0000203static int multiconvert(const char *arg, void *result, converter convert)
204{
205 if (*arg == '"' || *arg == '\'') {
206 arg = utoa((unsigned char)arg[1]);
207 }
208 errno = 0;
209 convert(arg, result);
210 if (errno) {
211 bb_error_msg("invalid number '%s'", arg);
212 return 1;
213 }
214 return 0;
215}
216
217static void FAST_FUNC conv_strtoull(const char *arg, void *result)
218{
219 /* Allow leading '+' - bb_strtoull() by itself does not allow it,
220 * and probably shouldn't (other callers might require purely numeric
221 * inputs to be allowed.
222 */
223 if (arg[0] == '+')
224 arg++;
225 *(unsigned long long*)result = bb_strtoull(arg, NULL, 0);
226 /* both coreutils 6.10 and bash 3.2:
227 * $ printf '%x\n' -2
228 * fffffffffffffffe
229 * Mimic that:
230 */
231 if (errno) {
232 *(unsigned long long*)result = bb_strtoll(arg, NULL, 0);
233 }
234}
235static void FAST_FUNC conv_strtoll(const char *arg, void *result)
236{
237 if (arg[0] == '+')
238 arg++;
239 *(long long*)result = bb_strtoll(arg, NULL, 0);
240}
241static void FAST_FUNC conv_strtod(const char *arg, void *result)
242{
243 char *end;
244 /* Well, this one allows leading whitespace... so what? */
245 /* What I like much less is that "-" accepted too! :( */
246 *(double*)result = strtod(arg, &end);
247 if (end[0]) {
248 errno = ERANGE;
249 *(double*)result = 0;
250 }
251}
252
253/* Callers should check errno to detect errors */
254static unsigned long long my_xstrtoull(const char *arg)
255{
256 unsigned long long result;
257 if (multiconvert(arg, &result, conv_strtoull))
258 result = 0;
259 return result;
260}
261static long long my_xstrtoll(const char *arg)
262{
263 long long result;
264 if (multiconvert(arg, &result, conv_strtoll))
265 result = 0;
266 return result;
267}
268static double my_xstrtod(const char *arg)
269{
270 double result;
271 multiconvert(arg, &result, conv_strtod);
272 return result;
273}
274
275/* Handles %b; return 1 if output is to be short-circuited by \c */
276static int print_esc_string(const char *str)
277{
278 char c;
279 while ((c = *str) != '\0') {
280 str++;
281 if (c == '\\') {
282 /* %b also accepts 4-digit octals of the form \0### */
283 if (*str == '0') {
284 if ((unsigned char)(str[1] - '0') < 8) {
285 /* 2nd char is 0..7: skip leading '0' */
286 str++;
287 }
288 }
289 else if (*str == 'c') {
290 return 1;
291 }
292 {
293 /* optimization: don't force arg to be on-stack,
294 * use another variable for that. */
295 const char *z = str;
296 c = bb_process_escape_sequence(&z);
297 str = z;
298 }
299 }
300 putchar(c);
301 }
302
303 return 0;
304}
305
306static void print_direc(char *format, unsigned fmt_length,
307 int field_width, int precision,
308 const char *argument)
309{
310 long long llv;
311 double dv;
312 char saved;
313 char *have_prec, *have_width;
314
315 saved = format[fmt_length];
316 format[fmt_length] = '\0';
317
318 have_prec = strstr(format, ".*");
319 have_width = strchr(format, '*');
320 if (have_width - 1 == have_prec)
321 have_width = NULL;
322
323 /* multiconvert sets errno = 0, but %s needs it cleared */
324 errno = 0;
325
326 switch (format[fmt_length - 1]) {
327 case 'c':
328 printf(format, *argument);
329 break;
330 case 'd':
331 case 'i':
332 llv = my_xstrtoll(skip_whitespace(argument));
333 print_long:
334 if (!have_width) {
335 if (!have_prec)
336 printf(format, llv);
337 else
338 printf(format, precision, llv);
339 } else {
340 if (!have_prec)
341 printf(format, field_width, llv);
342 else
343 printf(format, field_width, precision, llv);
344 }
345 break;
346 case 'o':
347 case 'u':
348 case 'x':
349 case 'X':
350 llv = my_xstrtoull(skip_whitespace(argument));
351 /* cheat: unsigned long and long have same width, so... */
352 goto print_long;
353 case 's':
354 /* Are char* and long long the same? */
355 if (sizeof(argument) == sizeof(llv)) {
356 llv = (long long)(ptrdiff_t)argument;
357 goto print_long;
358 } else {
359 /* Hope compiler will optimize it out by moving call
360 * instruction after the ifs... */
361 if (!have_width) {
362 if (!have_prec)
363 printf(format, argument, /*unused:*/ argument, argument);
364 else
365 printf(format, precision, argument, /*unused:*/ argument);
366 } else {
367 if (!have_prec)
368 printf(format, field_width, argument, /*unused:*/ argument);
369 else
370 printf(format, field_width, precision, argument);
371 }
372 break;
373 }
374 case 'f':
375 case 'e':
376 case 'E':
377 case 'g':
378 case 'G':
379 dv = my_xstrtod(argument);
380 if (!have_width) {
381 if (!have_prec)
382 printf(format, dv);
383 else
384 printf(format, precision, dv);
385 } else {
386 if (!have_prec)
387 printf(format, field_width, dv);
388 else
389 printf(format, field_width, precision, dv);
390 }
391 break;
392 } /* switch */
393
394 format[fmt_length] = saved;
395}
396
397/* Handle params for "%*.*f". Negative numbers are ok (compat). */
398static int get_width_prec(const char *str)
399{
400 int v = bb_strtoi(str, NULL, 10);
401 if (errno) {
402 bb_error_msg("invalid number '%s'", str);
403 v = 0;
404 }
405 return v;
406}
407
408/* Print the text in FORMAT, using ARGV for arguments to any '%' directives.
409 Return advanced ARGV. */
410static char **print_formatted(char *f, char **argv, int *conv_err)
411{
412 char *direc_start; /* Start of % directive. */
413 unsigned direc_length; /* Length of % directive. */
414 int field_width; /* Arg to first '*' */
415 int precision; /* Arg to second '*' */
416 char **saved_argv = argv;
417
418 for (; *f; ++f) {
419 switch (*f) {
420 case '%':
421 direc_start = f++;
422 direc_length = 1;
423 field_width = precision = 0;
424 if (*f == '%') {
425 bb_putchar('%');
426 break;
427 }
428 if (*f == 'b') {
429 if (*argv) {
430 if (print_esc_string(*argv))
431 return saved_argv; /* causes main() to exit */
432 ++argv;
433 }
434 break;
435 }
436 if (*f && strchr("-+ #", *f)) {
437 ++f;
438 ++direc_length;
439 }
440 if (*f == '*') {
441 ++f;
442 ++direc_length;
443 if (*argv)
444 field_width = get_width_prec(*argv++);
445 } else {
446 while (isdigit(*f)) {
447 ++f;
448 ++direc_length;
449 }
450 }
451 if (*f == '.') {
452 ++f;
453 ++direc_length;
454 if (*f == '*') {
455 ++f;
456 ++direc_length;
457 if (*argv)
458 precision = get_width_prec(*argv++);
459 } else {
460 while (isdigit(*f)) {
461 ++f;
462 ++direc_length;
463 }
464 }
465 }
466
467 /* Remove "lLhz" size modifiers, repeatedly.
468 * bash does not like "%lld", but coreutils
469 * happily takes even "%Llllhhzhhzd"!
470 * We are permissive like coreutils */
471 while ((*f | 0x20) == 'l' || *f == 'h' || *f == 'z') {
472 overlapping_strcpy(f, f + 1);
473 }
474 /* Add "ll" if integer modifier, then print */
475 {
476 static const char format_chars[] ALIGN1 = "diouxXfeEgGcs";
477 char *p = strchr(format_chars, *f);
478 /* needed - try "printf %" without it */
479 if (p == NULL || *f == '\0') {
480 bb_error_msg("%s: invalid format", direc_start);
481 /* causes main() to exit with error */
482 return saved_argv - 1;
483 }
484 ++direc_length;
485 if (p - format_chars <= 5) {
486 /* it is one of "diouxX" */
487 p = xmalloc(direc_length + 3);
488 memcpy(p, direc_start, direc_length);
489 p[direc_length + 1] = p[direc_length - 1];
490 p[direc_length - 1] = 'l';
491 p[direc_length] = 'l';
492 //bb_error_msg("<%s>", p);
493 direc_length += 2;
494 direc_start = p;
495 } else {
496 p = NULL;
497 }
498 if (*argv) {
499 print_direc(direc_start, direc_length, field_width,
500 precision, *argv++);
501 } else {
502 print_direc(direc_start, direc_length, field_width,
503 precision, "");
504 }
505 *conv_err |= errno;
506 free(p);
507 }
508 break;
509 case '\\':
510 if (*++f == 'c') {
511 return saved_argv; /* causes main() to exit */
512 }
513 bb_putchar(bb_process_escape_sequence((const char **)&f));
514 f--;
515 break;
516 default:
517 putchar(*f);
518 }
519 }
520
521 return argv;
522}
523
524int printf_main(int argc UNUSED_PARAM, char **argv)
525{
526 int conv_err;
527 char *format;
528 char **argv2;
529
530 /* We must check that stdout is not closed.
531 * The reason for this is highly non-obvious.
532 * printf_main is used from shell.
533 * Shell must correctly handle 'printf "%s" foo'
534 * if stdout is closed. With stdio, output gets shoveled into
535 * stdout buffer, and even fflush cannot clear it out. It seems that
536 * even if libc receives EBADF on write attempts, it feels determined
537 * to output data no matter what. So it will try later,
538 * and possibly will clobber future output. Not good. */
539// TODO: check fcntl() & O_ACCMODE == O_WRONLY or O_RDWR?
540 if (fcntl(1, F_GETFL) == -1)
541 return 1; /* match coreutils 6.10 (sans error msg to stderr) */
542 //if (dup2(1, 1) != 1) - old way
543 // return 1;
544
545 /* bash builtin errors out on "printf '-%s-\n' foo",
546 * coreutils-6.9 works. Both work with "printf -- '-%s-\n' foo".
547 * We will mimic coreutils. */
548 if (argv[1] && argv[1][0] == '-' && argv[1][1] == '-' && !argv[1][2])
549 argv++;
550 if (!argv[1]) {
551 if (ENABLE_ASH_PRINTF
552 && applet_name[0] != 'p'
553 ) {
554 bb_simple_error_msg("usage: printf FORMAT [ARGUMENT...]");
555 return 2; /* bash compat */
556 }
557 bb_show_usage();
558 }
559
560 format = argv[1];
561 argv2 = argv + 2;
562
563 conv_err = 0;
564 do {
565 argv = argv2;
566 argv2 = print_formatted(format, argv, &conv_err);
567 } while (argv2 > argv && *argv2);
568
569 /* coreutils compat (bash doesn't do this):
570 if (*argv)
571 fprintf(stderr, "excess args ignored");
572 */
573
574 return (argv2 < argv) /* if true, print_formatted errored out */
575 || conv_err; /* print_formatted saw invalid number */
576}