MXC: Fix byte-ordering in SPI driver for i.MX31/i.MX51

The actual SPI driver for i.MX31 and i.MX51 controller
use a wrong byte ordering, because it is supposed
to work only with Freescale's devices, as the Power
Controllers (PMIC). The driver is not suitable for
general purposes, because the buffers passed to spi_xfer
must be 32-bit aligned, as it is used mainly to send
integer to PMIC devices.

The patch drops any kind of limitation and makes the
driver useful with devices controlled sending commands
composed by single bytes (or by a odd number of bytes), such as
spi flash, sensor, etc.

Because the byte ordering is changed,
any current driver using this controller must be adapted, too.

Signed-off-by: Stefano Babic <sbabic@denx.de>
diff --git a/drivers/misc/fsl_pmic.c b/drivers/misc/fsl_pmic.c
index dca0a1d..5ee1de1 100644
--- a/drivers/misc/fsl_pmic.c
+++ b/drivers/misc/fsl_pmic.c
@@ -46,6 +46,7 @@
 u32 pmic_reg(u32 reg, u32 val, u32 write)
 {
 	u32 pmic_tx, pmic_rx;
+	u32 tmp;
 
 	if (!slave) {
 		slave = pmic_spi_probe();
@@ -65,7 +66,9 @@
 
 	pmic_tx = (write << 31) | (reg << 25) | (val & 0x00FFFFFF);
 
-	if (spi_xfer(slave, 4 << 3, &pmic_tx, &pmic_rx,
+	tmp = cpu_to_be32(pmic_tx);
+
+	if (spi_xfer(slave, 4 << 3, &tmp, &pmic_rx,
 			SPI_XFER_BEGIN | SPI_XFER_END)) {
 		spi_release_bus(slave);
 		return -1;
@@ -73,7 +76,8 @@
 
 	if (write) {
 		pmic_tx &= ~(1 << 31);
-		if (spi_xfer(slave, 4 << 3, &pmic_tx, &pmic_rx,
+		tmp = cpu_to_be32(pmic_tx);
+		if (spi_xfer(slave, 4 << 3, &tmp, &pmic_rx,
 			SPI_XFER_BEGIN | SPI_XFER_END)) {
 			spi_release_bus(slave);
 			return -1;
@@ -81,7 +85,7 @@
 	}
 
 	spi_release_bus(slave);
-	return pmic_rx;
+	return cpu_to_be32(pmic_rx);
 }
 
 void pmic_reg_write(u32 reg, u32 value)
diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
index c6c8f60..d558137 100644
--- a/drivers/spi/mxc_spi.c
+++ b/drivers/spi/mxc_spi.c
@@ -62,6 +62,7 @@
 #define MXC_CSPICTRL_MAXBITS	0x1f
 
 #define MXC_CSPIPERIOD_32KHZ	(1 << 15)
+#define MAX_SPI_BYTES	4
 
 static unsigned long spi_bases[] = {
 	0x43fa4000,
@@ -95,6 +96,7 @@
 #define MXC_CSPICTRL_RXOVF	(1 << 6)
 
 #define MXC_CSPIPERIOD_32KHZ	(1 << 15)
+#define MAX_SPI_BYTES	32
 
 /* Bit position inside CTRL register to be associated with SS */
 #define MXC_CSPICTRL_CHAN	18
@@ -252,13 +254,15 @@
 }
 #endif
 
-static u32 spi_xchg_single(struct spi_slave *slave, u32 data, int bitlen,
-			   unsigned long flags)
+int spi_xchg_single(struct spi_slave *slave, unsigned int bitlen,
+	const u8 *dout, u8 *din, unsigned long flags)
 {
 	struct mxc_spi_slave *mxcs = to_mxc_spi_slave(slave);
+	int nbytes = (bitlen + 7) / 8;
+	u32 data, cnt, i;
 
-	if (flags & SPI_XFER_BEGIN)
-		spi_cs_activate(slave);
+	debug("%s: bitlen %d dout 0x%x din 0x%x\n",
+		__func__, bitlen, (u32)dout, (u32)din);
 
 	mxcs->ctrl_reg = (mxcs->ctrl_reg &
 		~MXC_CSPICTRL_BITCOUNT(MXC_CSPICTRL_MAXBITS)) |
@@ -273,8 +277,46 @@
 	reg_write(mxcs->base + MXC_CSPISTAT,
 		MXC_CSPICTRL_TC | MXC_CSPICTRL_RXOVF);
 
-	debug("Sending SPI 0x%x\n", data);
-	reg_write(mxcs->base + MXC_CSPITXDATA, data);
+	/*
+	 * The SPI controller works only with words,
+	 * check if less than a word is sent.
+	 * Access to the FIFO is only 32 bit
+	 */
+	if (bitlen % 32) {
+		data = 0;
+		cnt = (bitlen % 32) / 8;
+		if (dout) {
+			for (i = 0; i < cnt; i++) {
+				data = (data << 8) | (*dout++ & 0xFF);
+			}
+		}
+		debug("Sending SPI 0x%x\n", data);
+
+		reg_write(mxcs->base + MXC_CSPITXDATA, data);
+		nbytes -= cnt;
+	}
+
+	data = 0;
+
+	while (nbytes > 0) {
+		data = 0;
+		if (dout) {
+			/* Buffer is not 32-bit aligned */
+			if ((unsigned long)dout & 0x03) {
+				data = 0;
+				for (i = 0; i < 4; i++, data <<= 8) {
+					data = (data << 8) | (*dout++ & 0xFF);
+				}
+			} else {
+				data = *(u32 *)dout;
+				data = cpu_to_be32(data);
+			}
+			dout += 4;
+		}
+		debug("Sending SPI 0x%x\n", data);
+		reg_write(mxcs->base + MXC_CSPITXDATA, data);
+		nbytes -= 4;
+	}
 
 	/* FIFO is written, now starts the transfer setting the XCH bit */
 	reg_write(mxcs->base + MXC_CSPICTRL, mxcs->ctrl_reg |
@@ -288,49 +330,78 @@
 	reg_write(mxcs->base + MXC_CSPISTAT,
 		MXC_CSPICTRL_TC | MXC_CSPICTRL_RXOVF);
 
-	data = reg_read(mxcs->base + MXC_CSPIRXDATA);
-	debug("SPI Rx: 0x%x\n", data);
+	nbytes = (bitlen + 7) / 8;
 
-	if (flags & SPI_XFER_END)
-		spi_cs_deactivate(slave);
+	cnt = nbytes % 32;
 
-	return data;
+	if (bitlen % 32) {
+		data = reg_read(mxcs->base + MXC_CSPIRXDATA);
+		cnt = (bitlen % 32) / 8;
+		debug("SPI Rx unaligned: 0x%x\n", data);
+		if (din) {
+			for (i = 0; i < cnt; i++, data >>= 8) {
+				*din++ = data & 0xFF;
+			}
+		}
+		nbytes -= cnt;
+	}
+
+	while (nbytes > 0) {
+		u32 tmp;
+		tmp = reg_read(mxcs->base + MXC_CSPIRXDATA);
+		data = cpu_to_be32(tmp);
+		debug("SPI Rx: 0x%x 0x%x\n", tmp, data);
+		cnt = min(nbytes, sizeof(data));
+		if (din) {
+			memcpy(din, &data, cnt);
+			din += cnt;
+		}
+		nbytes -= cnt;
+	}
+
+	return 0;
 
 }
 
+
 int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
 		void *din, unsigned long flags)
 {
-	int n_blks = (bitlen + 31) / 32;
-	u32 *out_l, *in_l;
-	int i;
+	int n_bytes = (bitlen + 7) / 8;
+	int n_bits;
+	int ret;
+	u32 blk_size;
+	u8 *p_outbuf = (u8 *)dout;
+	u8 *p_inbuf = (u8 *)din;
 
-	if ((int)dout & 3 || (int)din & 3) {
-		printf("Error: unaligned buffers in: %p, out: %p\n", din, dout);
-		return 1;
+	if (!slave)
+		return -1;
+
+	if (flags & SPI_XFER_BEGIN)
+		spi_cs_activate(slave);
+
+	while (n_bytes > 0) {
+
+		if (n_bytes < MAX_SPI_BYTES)
+			blk_size = n_bytes;
+		else
+			blk_size = MAX_SPI_BYTES;
+
+		n_bits = blk_size * 8;
+
+		ret = spi_xchg_single(slave, n_bits, p_outbuf, p_inbuf, 0);
+
+		if (ret)
+			return ret;
+		if (dout)
+			p_outbuf += blk_size;
+		if (din)
+			p_inbuf += blk_size;
+		n_bytes -= blk_size;
 	}
 
-	/* This driver is currently partly broken, alert the user */
-	if (bitlen > 16 && (bitlen % 32)) {
-		printf("Error: SPI transfer with bitlen=%d is broken.\n",
-		       bitlen);
-		return 1;
-	}
-
-	for (i = 0, in_l = (u32 *)din, out_l = (u32 *)dout;
-	     i < n_blks;
-	     i++, in_l++, out_l++, bitlen -= 32) {
-		u32 data = spi_xchg_single(slave, *out_l, bitlen, flags);
-
-		/* Check if we're only transfering 8 or 16 bits */
-		if (!i) {
-			if (bitlen < 9)
-				*(u8 *)din = data;
-			else if (bitlen < 17)
-				*(u16 *)din = data;
-			else
-				*in_l = data;
-		}
+	if (flags & SPI_XFER_END) {
+		spi_cs_deactivate(slave);
 	}
 
 	return 0;
@@ -378,8 +449,10 @@
 		return NULL;
 
 	mxcs = malloc(sizeof(struct mxc_spi_slave));
-	if (!mxcs)
+	if (!mxcs) {
+		puts("mxc_spi: SPI Slave not allocated !\n");
 		return NULL;
+	}
 
 	ret = decode_cs(mxcs, cs);
 	if (ret < 0) {