video: Add support for copying to a hardware framebuffer

Some architectures use a cached framebuffer and flush the cache as needed
so that changes are visible. This is supported by U-Boot.

However x86 uses an uncached framebuffer with a 'write-combining' feature
to speed up writes.  Reads are permitted but they are extremely expensive.

Unfortunately, reading from the frame buffer is quite common, e.g. to
scroll it. This makes scrolling very slow.

Add a new feature which supports copying modified parts of the frame
buffer to the uncached hardware buffer. This speeds up scrolling by at
least 10x on x86 so the extra complexity cost seems worth it.

As a starting point, add the Kconfig, update the video structures to keep
track of the buffer and add a function to do the copy.

Signed-off-by: Simon Glass <sjg@chromium.org>
Reviewed-by: Anatolij Gustschin <agust@denx.de>
Tested-by: Bin Meng <bmeng.cn@gmail.com>
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 0cf13ad..366e9ab 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -22,6 +22,18 @@
 	  This provides backlight uclass driver that enables basic panel
 	  backlight support.
 
+config VIDEO_COPY
+	bool "Enable copying the frame buffer to a hardware copy"
+	depends on DM_VIDEO
+	help
+	  On some machines (e.g. x86), reading from the frame buffer is very
+	  slow because it is uncached. To improve performance, this feature
+	  allows the frame buffer to be kept in cached memory (allocated by
+	  U-Boot) and then copied to the hardware frame-buffer as needed.
+
+	  To use this, your video driver must set @copy_base in
+	  struct video_uc_platdata.
+
 config BACKLIGHT_PWM
 	bool "Generic PWM based Backlight Driver"
 	depends on BACKLIGHT && DM_PWM
diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
index 1f28745..341db68 100644
--- a/drivers/video/video-uclass.c
+++ b/drivers/video/video-uclass.c
@@ -4,6 +4,7 @@
  */
 
 #include <common.h>
+#include <console.h>
 #include <cpu_func.h>
 #include <dm.h>
 #include <log.h>
@@ -201,6 +202,59 @@
 	return priv->ysize;
 }
 
+#ifdef CONFIG_VIDEO_COPY
+int video_sync_copy(struct udevice *dev, void *from, void *to)
+{
+	struct video_priv *priv = dev_get_uclass_priv(dev);
+
+	if (priv->copy_fb) {
+		long offset, size;
+
+		/* Find the offset of the first byte to copy */
+		if ((ulong)to > (ulong)from) {
+			size = to - from;
+			offset = from - priv->fb;
+		} else {
+			size = from - to;
+			offset = to - priv->fb;
+		}
+
+		/*
+		 * Allow a bit of leeway for valid requests somewhere near the
+		 * frame buffer
+		 */
+		if (offset < -priv->fb_size || offset > 2 * priv->fb_size) {
+#ifdef DEBUG
+			char str[80];
+
+			snprintf(str, sizeof(str),
+				 "[sync_copy fb=%p, from=%p, to=%p, offset=%lx]",
+				 priv->fb, from, to, offset);
+			console_puts_select_stderr(true, str);
+#endif
+			return -EFAULT;
+		}
+
+		/*
+		 * Silently crop the memcpy. This allows callers to avoid doing
+		 * this themselves. It is common for the end pointer to go a
+		 * few lines after the end of the frame buffer, since most of
+		 * the update algorithms terminate a line after their last write
+		 */
+		if (offset + size > priv->fb_size) {
+			size = priv->fb_size - offset;
+		} else if (offset < 0) {
+			size += offset;
+			offset = 0;
+		}
+
+		memcpy(priv->copy_fb + offset, priv->fb + offset, size);
+	}
+
+	return 0;
+}
+#endif
+
 /* Set up the colour map */
 static int video_pre_probe(struct udevice *dev)
 {