console: Fix pre-console flushing via cfb_console being very slow

On my A10 OlinuxIno Lime I noticed a huge (5+ seconds) delay coming from
console_init_r. This turns out to be caused by the preconsole buffer flushing
to the cfb_console. The Lime only has a 16 bit memory bus and that is already
heavy used to scan out the 1920x1080 framebuffer.

The problem is that print_pre_console_buffer() was printing the buffer once
character at a time and the cfb_console code then ends up doing a cache-flush
for touched display lines for each character.

This commit fixes this by first building a 0 terminated buffer and then
printing it in one puts() call, avoiding unnecessary cache flushes.

This changes the time for the flush from 5+ seconds to not noticable.

The downside of this approach is that the pre-console buffer needs to fit
on the stack, this is not that much to ask since we are talking about plain
text here. This commit also adjusts the sunxi CONFIG_PRE_CON_BUF_SZ to
actually fit on the stack. Sunxi currently is the only user of the pre-console
code so no other boards need to be adjusted.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Tom Rini <trini@konsulko.com>
diff --git a/README b/README
index 1ea397a..2e81ccc 100644
--- a/README
+++ b/README
@@ -948,6 +948,9 @@
 		bytes are output before the console is initialised, the
 		earlier bytes are discarded.
 
+		Note that when printing the buffer a copy is made on the
+		stack so CONFIG_PRE_CON_BUF_SZ must fit on the stack.
+
 		'Sane' compilers will generate smaller code if
 		CONFIG_PRE_CON_BUF_SZ is a power of 2
 
diff --git a/common/console.c b/common/console.c
index 3f25e76..0058222 100644
--- a/common/console.c
+++ b/common/console.c
@@ -200,15 +200,15 @@
 }
 
 #ifdef CONFIG_PRE_CONSOLE_BUFFER
-static void console_putc_noserial(int file, const char c)
+static void console_puts_noserial(int file, const char *s)
 {
 	int i;
 	struct stdio_dev *dev;
 
 	for (i = 0; i < cd_count[file]; i++) {
 		dev = console_devices[file][i];
-		if (dev->putc != NULL && strcmp(dev->name, "serial") != 0)
-			dev->putc(dev, c);
+		if (dev->puts != NULL && strcmp(dev->name, "serial") != 0)
+			dev->puts(dev, s);
 	}
 }
 #endif
@@ -251,10 +251,10 @@
 }
 
 #ifdef CONFIG_PRE_CONSOLE_BUFFER
-static inline void console_putc_noserial(int file, const char c)
+static inline void console_puts_noserial(int file, const char *s)
 {
 	if (strcmp(stdio_devices[file]->name, "serial") != 0)
-		stdio_devices[file]->putc(stdio_devices[file], c);
+		stdio_devices[file]->puts(stdio_devices[file], s);
 }
 #endif
 
@@ -425,22 +425,26 @@
 
 static void print_pre_console_buffer(int flushpoint)
 {
-	unsigned long i = 0;
-	char *buffer = (char *)CONFIG_PRE_CON_BUF_ADDR;
+	unsigned long in = 0, out = 0;
+	char *buf_in = (char *)CONFIG_PRE_CON_BUF_ADDR;
+	char buf_out[CONFIG_PRE_CON_BUF_SZ + 1];
 
 	if (gd->precon_buf_idx > CONFIG_PRE_CON_BUF_SZ)
-		i = gd->precon_buf_idx - CONFIG_PRE_CON_BUF_SZ;
+		in = gd->precon_buf_idx - CONFIG_PRE_CON_BUF_SZ;
 
-	while (i < gd->precon_buf_idx)
-		switch (flushpoint) {
-		case PRE_CONSOLE_FLUSHPOINT1_SERIAL:
-			putc(buffer[CIRC_BUF_IDX(i++)]);
-			break;
-		case PRE_CONSOLE_FLUSHPOINT2_EVERYTHING_BUT_SERIAL:
-			console_putc_noserial(stdout,
-					      buffer[CIRC_BUF_IDX(i++)]);
-			break;
-		}
+	while (in < gd->precon_buf_idx)
+		buf_out[out++] = buf_in[CIRC_BUF_IDX(in++)];
+
+	buf_out[out] = 0;
+
+	switch (flushpoint) {
+	case PRE_CONSOLE_FLUSHPOINT1_SERIAL:
+		puts(buf_out);
+		break;
+	case PRE_CONSOLE_FLUSHPOINT2_EVERYTHING_BUT_SERIAL:
+		console_puts_noserial(stdout, buf_out);
+		break;
+	}
 }
 #else
 static inline void pre_console_putc(const char c) {}
diff --git a/include/configs/sunxi-common.h b/include/configs/sunxi-common.h
index 723067e..0495dc3 100644
--- a/include/configs/sunxi-common.h
+++ b/include/configs/sunxi-common.h
@@ -336,7 +336,7 @@
 
 /* Enable pre-console buffer to get complete log on the VGA console */
 #define CONFIG_PRE_CONSOLE_BUFFER
-#define CONFIG_PRE_CON_BUF_SZ		(1024 * 1024)
+#define CONFIG_PRE_CON_BUF_SZ		4096 /* Aprox 2 80*25 screens */
 /* Use the room between the end of bootm_size and the framebuffer */
 #define CONFIG_PRE_CON_BUF_ADDR		0x4f000000