mmc: tmio: sdhi: Use bounce buffer to avoid DMA limitations

The R-Car SDHI DMA controller has various restrictions. To work around
those restrictions without falling back to PIO, implement bounce buffer
with custom alignment check function which tests for those limitations.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Peng Fan <peng.fan@nxp.com>
Cc: Simon Glass <sjg@chromium.org>
Cc: Tom Rini <trini@konsulko.com>
diff --git a/drivers/mmc/renesas-sdhi.c b/drivers/mmc/renesas-sdhi.c
index c3b1313..231a781 100644
--- a/drivers/mmc/renesas-sdhi.c
+++ b/drivers/mmc/renesas-sdhi.c
@@ -4,6 +4,7 @@
  */
 
 #include <common.h>
+#include <bouncebuf.h>
 #include <clk.h>
 #include <fdtdec.h>
 #include <malloc.h>
@@ -689,12 +690,88 @@
 }
 #endif
 
+#define RENESAS_SDHI_DMA_ALIGNMENT	128
+
+static int renesas_sdhi_addr_aligned(struct bounce_buffer *state)
+{
+	uintptr_t ubuf = (uintptr_t)state->user_buffer;
+
+	/* Check if start is aligned */
+	if (!IS_ALIGNED(ubuf, RENESAS_SDHI_DMA_ALIGNMENT)) {
+		debug("Unaligned buffer address %p\n", state->user_buffer);
+		return 0;
+	}
+
+	/* Check if length is aligned */
+	if (state->len != state->len_aligned) {
+		debug("Unaligned buffer length %zu\n", state->len);
+		return 0;
+	}
+
+#ifdef CONFIG_PHYS_64BIT
+	/* Check if below 32bit boundary */
+	if ((ubuf >> 32) || (ubuf + state->len_aligned) >> 32) {
+		debug("Buffer above 32bit boundary %p-%p\n",
+			state->user_buffer,
+			state->user_buffer + state->len_aligned);
+		return 0;
+	}
+#endif
+
+	/* Aligned */
+	return 1;
+}
+
 static int renesas_sdhi_send_cmd(struct udevice *dev, struct mmc_cmd *cmd,
 				 struct mmc_data *data)
 {
+	struct bounce_buffer bbstate;
+	unsigned int bbflags;
+	bool bbok = false;
+	size_t len;
+	void *buf;
 	int ret;
 
+	if (data) {
+		if (data->flags & MMC_DATA_READ) {
+			buf = data->dest;
+			bbflags = GEN_BB_WRITE;
+		} else {
+			buf = (void *)data->src;
+			bbflags = GEN_BB_READ;
+		}
+		len = data->blocks * data->blocksize;
+
+		ret = bounce_buffer_start_extalign(&bbstate, buf, len, bbflags,
+						   RENESAS_SDHI_DMA_ALIGNMENT,
+						   renesas_sdhi_addr_aligned);
+		/*
+		 * If the amount of data to transfer is too large, we can get
+		 * -ENOMEM when starting the bounce buffer. If that happens,
+		 *  fall back to PIO as it was before, otherwise use the BB.
+		 */
+		if (!ret) {
+			bbok = true;
+			if (data->flags & MMC_DATA_READ)
+				data->dest = bbstate.bounce_buffer;
+			else
+				data->src = bbstate.bounce_buffer;
+		}
+	}
+
 	ret = tmio_sd_send_cmd(dev, cmd, data);
+
+	if (data && bbok) {
+		buf = bbstate.user_buffer;
+
+		bounce_buffer_stop(&bbstate);
+
+		if (data->flags & MMC_DATA_READ)
+			data->dest = buf;
+		else
+			data->src = buf;
+	}
+
 	if (ret)
 		return ret;