sunxi: improve throughput in the sunxi_mmc driver

Throughput tests have shown the sunxi_mmc driver to take over 10s to
read 10MB from a fast eMMC device due to excessive delays in polling
loops.

This commit restructures the main polling loops to use get_timer(...)
to determine whether a (millisecond) timeout has expired.  We choose
not to use the wait_bit function, as we don't need interruptability
with ctrl-c and have at least one case where two bits (one for an
error condition and another one for completion) need to be read and
using wait_bit would have not added to the clarity.

The observed speedup in testing on a A31 is greater than 10x (e.g. a
10MB write decreases from 9.302s to 0.884s).

Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Tested-by: Mylène Josserand <mylene.josserand@bootlin.com>
Acked-by: Jagan Teki <jagan@openedev.com>
Reviewed-by: Tom Rini <trini@konsulko.com>
diff --git a/drivers/mmc/sunxi_mmc.c b/drivers/mmc/sunxi_mmc.c
index f4c245c..5292f2d 100644
--- a/drivers/mmc/sunxi_mmc.c
+++ b/drivers/mmc/sunxi_mmc.c
@@ -188,15 +188,16 @@
 {
 	unsigned int cmd;
 	unsigned timeout_msecs = 2000;
+	unsigned long start = get_timer(0);
 
 	cmd = SUNXI_MMC_CMD_START |
 	      SUNXI_MMC_CMD_UPCLK_ONLY |
 	      SUNXI_MMC_CMD_WAIT_PRE_OVER;
+
 	writel(cmd, &priv->reg->cmd);
 	while (readl(&priv->reg->cmd) & SUNXI_MMC_CMD_START) {
-		if (!timeout_msecs--)
+		if (get_timer(start) > timeout_msecs)
 			return -1;
-		udelay(1000);
 	}
 
 	/* clock update sets various irq status bits, clear these */
@@ -277,18 +278,21 @@
 	unsigned i;
 	unsigned *buff = (unsigned int *)(reading ? data->dest : data->src);
 	unsigned byte_cnt = data->blocksize * data->blocks;
-	unsigned timeout_usecs = (byte_cnt >> 8) * 1000;
-	if (timeout_usecs < 2000000)
-		timeout_usecs = 2000000;
+	unsigned timeout_msecs = byte_cnt >> 8;
+	unsigned long  start;
+
+	if (timeout_msecs < 2000)
+		timeout_msecs = 2000;
 
 	/* Always read / write data through the CPU */
 	setbits_le32(&priv->reg->gctrl, SUNXI_MMC_GCTRL_ACCESS_BY_AHB);
 
+	start = get_timer(0);
+
 	for (i = 0; i < (byte_cnt >> 2); i++) {
 		while (readl(&priv->reg->status) & status_bit) {
-			if (!timeout_usecs--)
+			if (get_timer(start) > timeout_msecs)
 				return -1;
-			udelay(1);
 		}
 
 		if (reading)
@@ -304,16 +308,16 @@
 			 uint timeout_msecs, uint done_bit, const char *what)
 {
 	unsigned int status;
+	unsigned long start = get_timer(0);
 
 	do {
 		status = readl(&priv->reg->rint);
-		if (!timeout_msecs-- ||
+		if ((get_timer(start) > timeout_msecs) ||
 		    (status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT)) {
 			debug("%s timeout %x\n", what,
 			      status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT);
 			return -ETIMEDOUT;
 		}
-		udelay(1000);
 	} while (!(status & done_bit));
 
 	return 0;
@@ -405,15 +409,16 @@
 	}
 
 	if (cmd->resp_type & MMC_RSP_BUSY) {
+		unsigned long start = get_timer(0);
 		timeout_msecs = 2000;
+
 		do {
 			status = readl(&priv->reg->status);
-			if (!timeout_msecs--) {
+			if (get_timer(start) > timeout_msecs) {
 				debug("busy timeout\n");
 				error = -ETIMEDOUT;
 				goto out;
 			}
-			udelay(1000);
 		} while (status & SUNXI_MMC_STATUS_CARD_DATA_BUSY);
 	}