Merge branch 'master' of git://git.denx.de/u-boot-mmc

* 'master' of git://git.denx.de/u-boot-mmc:
  tegra2: Move MMC clock initialization into MMC driver
  mmc: sdhci: fix sdma bug for large file transfer
  mmc: sdhci: add timeout for data transfer
  mmc: sdhci: add mmc structure for host
  mmc: sdhci: fix build warning
  mmc: sdhci: fix cache flush
  mmc: CMD7:MMC_CMD_SELECT_CARD response fix
  mmc: test mmc bus width on startup
  mmc: change magic number to macro define
  mmc: mv_sdhci: fix 8bus width access for 88SV331xV5
  mmc: retry the cmd8 to meet 74 clocks requirement in the spec
  PXA: Add MMC driver using the generic MMC framework
diff --git a/arch/arm/include/asm/arch-pxa/regs-mmc.h b/arch/arm/include/asm/arch-pxa/regs-mmc.h
new file mode 100644
index 0000000..b5c9b3b
--- /dev/null
+++ b/arch/arm/include/asm/arch-pxa/regs-mmc.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2011 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef __REGS_MMC_H__
+#define __REGS_MMC_H__
+
+#define MMC0_BASE	0x41100000
+#define MMC1_BASE	0x42000000
+
+int pxa_mmc_register(int card_index);
+
+struct pxa_mmc_regs {
+	uint32_t	strpcl;
+	uint32_t	stat;
+	uint32_t	clkrt;
+	uint32_t	spi;
+	uint32_t	cmdat;
+	uint32_t	resto;
+	uint32_t	rdto;
+	uint32_t	blklen;
+	uint32_t	nob;
+	uint32_t	prtbuf;
+	uint32_t	i_mask;
+	uint32_t	i_reg;
+	uint32_t	cmd;
+	uint32_t	argh;
+	uint32_t	argl;
+	uint32_t	res;
+	uint32_t	rxfifo;
+	uint32_t	txfifo;
+};
+
+/* MMC_STRPCL */
+#define MMC_STRPCL_STOP_CLK		(1 << 0)
+#define MMC_STRPCL_START_CLK		(1 << 1)
+
+/* MMC_STAT */
+#define MMC_STAT_END_CMD_RES		(1 << 13)
+#define MMC_STAT_PRG_DONE		(1 << 12)
+#define MMC_STAT_DATA_TRAN_DONE		(1 << 11)
+#define MMC_STAT_CLK_EN			(1 << 8)
+#define MMC_STAT_RECV_FIFO_FULL		(1 << 7)
+#define MMC_STAT_XMIT_FIFO_EMPTY	(1 << 6)
+#define MMC_STAT_RES_CRC_ERROR		(1 << 5)
+#define MMC_STAT_SPI_READ_ERROR_TOKEN	(1 << 4)
+#define MMC_STAT_CRC_READ_ERROR		(1 << 3)
+#define MMC_STAT_CRC_WRITE_ERROR	(1 << 2)
+#define MMC_STAT_TIME_OUT_RESPONSE	(1 << 1)
+#define MMC_STAT_READ_TIME_OUT		(1 << 0)
+
+/* MMC_CLKRT */
+#define MMC_CLKRT_20MHZ			0
+#define MMC_CLKRT_10MHZ			1
+#define MMC_CLKRT_5MHZ			2
+#define MMC_CLKRT_2_5MHZ		3
+#define MMC_CLKRT_1_25MHZ		4
+#define MMC_CLKRT_0_625MHZ		5
+#define MMC_CLKRT_0_3125MHZ		6
+
+/* MMC_SPI */
+#define MMC_SPI_EN			(1 << 0)
+#define MMC_SPI_CS_EN			(1 << 2)
+#define MMC_SPI_CS_ADDRESS		(1 << 3)
+#define MMC_SPI_CRC_ON			(1 << 1)
+
+/* MMC_CMDAT */
+#define MMC_CMDAT_SD_4DAT		(1 << 8)
+#define MMC_CMDAT_MMC_DMA_EN		(1 << 7)
+#define MMC_CMDAT_INIT			(1 << 6)
+#define MMC_CMDAT_BUSY			(1 << 5)
+#define MMC_CMDAT_BCR			(MMC_CMDAT_BUSY | MMC_CMDAT_INIT)
+#define MMC_CMDAT_STREAM		(1 << 4)
+#define MMC_CMDAT_WRITE			(1 << 3)
+#define MMC_CMDAT_DATA_EN		(1 << 2)
+#define MMC_CMDAT_R0			0
+#define MMC_CMDAT_R1			1
+#define MMC_CMDAT_R2			2
+#define MMC_CMDAT_R3			3
+
+/* MMC_RESTO */
+#define MMC_RES_TO_MAX_MASK		0x7f
+
+/* MMC_RDTO */
+#define MMC_READ_TO_MAX_MASK		0xffff
+
+/* MMC_BLKLEN */
+#define MMC_BLK_LEN_MAX_MASK		0x3ff
+
+/* MMC_PRTBUF */
+#define MMC_PRTBUF_BUF_PART_FULL	(1 << 0)
+
+/* MMC_I_MASK */
+#define MMC_I_MASK_TXFIFO_WR_REQ	(1 << 6)
+#define MMC_I_MASK_RXFIFO_RD_REQ	(1 << 5)
+#define MMC_I_MASK_CLK_IS_OFF		(1 << 4)
+#define MMC_I_MASK_STOP_CMD		(1 << 3)
+#define MMC_I_MASK_END_CMD_RES		(1 << 2)
+#define MMC_I_MASK_PRG_DONE		(1 << 1)
+#define MMC_I_MASK_DATA_TRAN_DONE	(1 << 0)
+#define MMC_I_MASK_ALL			0x7f
+
+
+/* MMC_I_REG */
+#define MMC_I_REG_TXFIFO_WR_REQ		(1 << 6)
+#define MMC_I_REG_RXFIFO_RD_REQ		(1 << 5)
+#define MMC_I_REG_CLK_IS_OFF		(1 << 4)
+#define MMC_I_REG_STOP_CMD		(1 << 3)
+#define MMC_I_REG_END_CMD_RES		(1 << 2)
+#define MMC_I_REG_PRG_DONE		(1 << 1)
+#define MMC_I_REG_DATA_TRAN_DONE	(1 << 0)
+
+/* MMC_CMD */
+#define MMC_CMD_INDEX_MAX		0x6f
+
+#define MMC_R1_IDLE_STATE		0x01
+#define MMC_R1_ERASE_STATE		0x02
+#define MMC_R1_ILLEGAL_CMD		0x04
+#define MMC_R1_COM_CRC_ERR		0x08
+#define MMC_R1_ERASE_SEQ_ERR		0x01
+#define MMC_R1_ADDR_ERR			0x02
+#define MMC_R1_PARAM_ERR		0x04
+
+#define MMC_R1B_WP_ERASE_SKIP		0x0002
+#define MMC_R1B_ERR			0x0004
+#define MMC_R1B_CC_ERR			0x0008
+#define MMC_R1B_CARD_ECC_ERR		0x0010
+#define MMC_R1B_WP_VIOLATION		0x0020
+#define MMC_R1B_ERASE_PARAM		0x0040
+#define MMC_R1B_OOR			0x0080
+#define MMC_R1B_IDLE_STATE		0x0100
+#define MMC_R1B_ERASE_RESET		0x0200
+#define MMC_R1B_ILLEGAL_CMD		0x0400
+#define MMC_R1B_COM_CRC_ERR		0x0800
+#define MMC_R1B_ERASE_SEQ_ERR		0x1000
+#define MMC_R1B_ADDR_ERR		0x2000
+#define MMC_R1B_PARAM_ERR		0x4000
+
+#endif	/* __REGS_MMC_H__ */
diff --git a/board/nvidia/common/board.c b/board/nvidia/common/board.c
index d13537d..370a259 100644
--- a/board/nvidia/common/board.c
+++ b/board/nvidia/common/board.c
@@ -102,16 +102,6 @@
 
 #ifdef CONFIG_TEGRA2_MMC
 /*
- * Routine: clock_init_mmc
- * Description: init the PLL and clocks for the SDMMC controllers
- */
-static void clock_init_mmc(void)
-{
-	clock_start_periph_pll(PERIPH_ID_SDMMC4, CLOCK_ID_PERIPH, 20000000);
-	clock_start_periph_pll(PERIPH_ID_SDMMC3, CLOCK_ID_PERIPH, 20000000);
-}
-
-/*
  * Routine: pin_mux_mmc
  * Description: setup the pin muxes/tristate values for the SDMMC(s)
  */
@@ -157,8 +147,7 @@
 int board_mmc_init(bd_t *bd)
 {
 	debug("board_mmc_init called\n");
-	/* Enable clocks, muxes, etc. for SDMMC controllers */
-	clock_init_mmc();
+	/* Enable muxes, etc. for SDMMC controllers */
 	pin_mux_mmc();
 	gpio_config_mmc();
 
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index 6e94860..9f9db75 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -38,6 +38,7 @@
 COBJS-$(CONFIG_MXC_MMC) += mxcmmc.o
 COBJS-$(CONFIG_OMAP_HSMMC) += omap_hsmmc.o
 COBJS-$(CONFIG_PXA_MMC) += pxa_mmc.o
+COBJS-$(CONFIG_PXA_MMC_GENERIC) += pxa_mmc_gen.o
 COBJS-$(CONFIG_S5P_MMC) += s5p_mmc.o
 COBJS-$(CONFIG_SDHCI) += sdhci.o
 COBJS-$(CONFIG_SH_MMCIF) += sh_mmcif.o
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index e5fedb3..37ce6e8 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -631,14 +631,12 @@
 	if (mmc->version < MMC_VERSION_4)
 		return 0;
 
-	mmc->card_caps |= MMC_MODE_4BIT;
-
 	err = mmc_send_ext_csd(mmc, ext_csd);
 
 	if (err)
 		return err;
 
-	cardtype = ext_csd[196] & 0xf;
+	cardtype = ext_csd[EXT_CSD_CARD_TYPE] & 0xf;
 
 	err = mmc_switch(mmc, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_HS_TIMING, 1);
 
@@ -652,7 +650,7 @@
 		return err;
 
 	/* No high-speed support */
-	if (!ext_csd[185])
+	if (!ext_csd[EXT_CSD_HS_TIMING])
 		return 0;
 
 	/* High Speed is set, there are two types: 52MHz and 26MHz */
@@ -856,11 +854,12 @@
 
 int mmc_startup(struct mmc *mmc)
 {
-	int err;
+	int err, width;
 	uint mult, freq;
 	u64 cmult, csize, capacity;
 	struct mmc_cmd cmd;
 	ALLOC_CACHE_ALIGN_BUFFER(char, ext_csd, 512);
+	ALLOC_CACHE_ALIGN_BUFFER(char, test_csd, 512);
 	int timeout = 1000;
 
 #ifdef CONFIG_MMC_SPI_CRC_ON
@@ -989,7 +988,7 @@
 	/* Select the card, and put it into Transfer Mode */
 	if (!mmc_host_is_spi(mmc)) { /* cmd not supported in spi */
 		cmd.cmdidx = MMC_CMD_SELECT_CARD;
-		cmd.resp_type = MMC_RSP_R1b;
+		cmd.resp_type = MMC_RSP_R1;
 		cmd.cmdarg = mmc->rca << 16;
 		cmd.flags = 0;
 		err = mmc_send_cmd(mmc, &cmd, NULL);
@@ -1006,14 +1005,16 @@
 	if (!IS_SD(mmc) && (mmc->version >= MMC_VERSION_4)) {
 		/* check  ext_csd version and capacity */
 		err = mmc_send_ext_csd(mmc, ext_csd);
-		if (!err & (ext_csd[192] >= 2)) {
+		if (!err & (ext_csd[EXT_CSD_REV] >= 2)) {
 			/*
 			 * According to the JEDEC Standard, the value of
 			 * ext_csd's capacity is valid if the value is more
 			 * than 2GB
 			 */
-			capacity = ext_csd[212] << 0 | ext_csd[213] << 8 |
-				   ext_csd[214] << 16 | ext_csd[215] << 24;
+			capacity = ext_csd[EXT_CSD_SEC_CNT] << 0
+					| ext_csd[EXT_CSD_SEC_CNT + 1] << 8
+					| ext_csd[EXT_CSD_SEC_CNT + 2] << 16
+					| ext_csd[EXT_CSD_SEC_CNT + 3] << 24;
 			capacity *= 512;
 			if ((capacity >> 20) > 2 * 1024)
 				mmc->capacity = capacity;
@@ -1024,8 +1025,9 @@
 		 * group size from ext_csd directly, or calculate
 		 * the group size from the csd value.
 		 */
-		if (ext_csd[175])
-			mmc->erase_grp_size = ext_csd[224] * 512 * 1024;
+		if (ext_csd[EXT_CSD_ERASE_GROUP_DEF])
+			mmc->erase_grp_size =
+			      ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] * 512 * 1024;
 		else {
 			int erase_gsz, erase_gmul;
 			erase_gsz = (mmc->csd[2] & 0x00007c00) >> 10;
@@ -1035,8 +1037,8 @@
 		}
 
 		/* store the partition info of emmc */
-		if (ext_csd[160] & PART_SUPPORT)
-			mmc->part_config = ext_csd[179];
+		if (ext_csd[EXT_CSD_PARTITIONING_SUPPORT] & PART_SUPPORT)
+			mmc->part_config = ext_csd[EXT_CSD_PART_CONF];
 	}
 
 	if (IS_SD(mmc))
@@ -1077,26 +1079,35 @@
 		else
 			mmc_set_clock(mmc, 25000000);
 	} else {
-		if (mmc->card_caps & MMC_MODE_4BIT) {
+		for (width = EXT_CSD_BUS_WIDTH_8; width >= 0; width--) {
 			/* Set the card to use 4 bit*/
 			err = mmc_switch(mmc, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_BUS_WIDTH,
-					EXT_CSD_BUS_WIDTH_4);
+					EXT_CSD_BUS_WIDTH, width);
 
 			if (err)
-				return err;
+				continue;
 
-			mmc_set_bus_width(mmc, 4);
-		} else if (mmc->card_caps & MMC_MODE_8BIT) {
-			/* Set the card to use 8 bit*/
-			err = mmc_switch(mmc, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_BUS_WIDTH,
-					EXT_CSD_BUS_WIDTH_8);
+			if (!width) {
+				mmc_set_bus_width(mmc, 1);
+				break;
+			} else
+				mmc_set_bus_width(mmc, 4 * width);
 
-			if (err)
-				return err;
+			err = mmc_send_ext_csd(mmc, test_csd);
+			if (!err && ext_csd[EXT_CSD_PARTITIONING_SUPPORT] \
+				    == test_csd[EXT_CSD_PARTITIONING_SUPPORT]
+				 && ext_csd[EXT_CSD_ERASE_GROUP_DEF] \
+				    == test_csd[EXT_CSD_ERASE_GROUP_DEF] \
+				 && ext_csd[EXT_CSD_REV] \
+				    == test_csd[EXT_CSD_REV]
+				 && ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] \
+				    == test_csd[EXT_CSD_HC_ERASE_GRP_SIZE]
+				 && memcmp(&ext_csd[EXT_CSD_SEC_CNT], \
+					&test_csd[EXT_CSD_SEC_CNT], 4) == 0) {
 
-			mmc_set_bus_width(mmc, 8);
+				mmc->card_caps |= width;
+				break;
+			}
 		}
 
 		if (mmc->card_caps & MMC_MODE_HS) {
@@ -1179,7 +1190,7 @@
 
 int mmc_init(struct mmc *mmc)
 {
-	int err;
+	int err, retry = 3;
 
 	if (mmc->has_init)
 		return 0;
@@ -1202,7 +1213,19 @@
 	mmc->part_num = 0;
 
 	/* Test for SD version 2 */
-	err = mmc_send_if_cond(mmc);
+	/*
+	 * retry here for 3 times, as for some controller it has dynamic
+	 * clock gating, and only toggle out clk when the first cmd0 send
+	 * out, while some card strictly obey the 74 clocks rule, the interval
+	 * may not be sufficient between the cmd0 and this cmd8, retry to
+	 * fulfil the clock requirement
+	 */
+	do {
+		err = mmc_send_if_cond(mmc);
+	} while (--retry > 0 && err);
+
+	if (err)
+		return err;
 
 	/* Now try to get the SD card's operating condition */
 	err = sd_send_op_cond(mmc);
diff --git a/drivers/mmc/mv_sdhci.c b/drivers/mmc/mv_sdhci.c
index 9e59951..f92caeb 100644
--- a/drivers/mmc/mv_sdhci.c
+++ b/drivers/mmc/mv_sdhci.c
@@ -2,6 +2,33 @@
 #include <malloc.h>
 #include <sdhci.h>
 
+#ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
+static struct sdhci_ops mv_ops;
+
+#if defined(CONFIG_SHEEVA_88SV331xV5)
+#define SD_CE_ATA_2	0xEA
+#define  MMC_CARD	0x1000
+#define  MMC_WIDTH	0x0100
+static inline void mv_sdhci_writeb(struct sdhci_host *host, u8 val, int reg)
+{
+	struct mmc *mmc = host->mmc;
+	u32 ata = (u32)host->ioaddr + SD_CE_ATA_2;
+
+	if (!IS_SD(mmc) && reg == SDHCI_HOST_CONTROL) {
+		if (mmc->bus_width == 8)
+			writew(readw(ata) | (MMC_CARD | MMC_WIDTH), ata);
+		else
+			writew(readw(ata) & ~(MMC_CARD | MMC_WIDTH), ata);
+	}
+
+	writeb(val, host->ioaddr + reg);
+}
+
+#else
+#define mv_sdhci_writeb	NULL
+#endif /* CONFIG_SHEEVA_88SV331xV5 */
+#endif /* CONFIG_MMC_SDHCI_IO_ACCESSORS */
+
 static char *MVSDH_NAME = "mv_sdh";
 int mv_sdh_init(u32 regbase, u32 max_clk, u32 min_clk, u32 quirks)
 {
@@ -15,6 +42,12 @@
 	host->name = MVSDH_NAME;
 	host->ioaddr = (void *)regbase;
 	host->quirks = quirks;
+#ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
+	memset(&mv_ops, 0, sizeof(struct sdhci_ops));
+	if (mv_sdhci_writeb != NULL)
+		mv_ops.write_b = mv_sdhci_writeb;
+	host->ops = &mv_ops;
+#endif
 	host->version = sdhci_readw(host, SDHCI_HOST_VERSION);
 	add_sdhci(host, max_clk, min_clk);
 	return 0;
diff --git a/drivers/mmc/pxa_mmc_gen.c b/drivers/mmc/pxa_mmc_gen.c
new file mode 100644
index 0000000..28e37b4
--- /dev/null
+++ b/drivers/mmc/pxa_mmc_gen.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (C) 2010 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * Loosely based on the old code and Linux's PXA MMC driver
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include <common.h>
+#include <malloc.h>
+
+#include <mmc.h>
+#include <asm/errno.h>
+#include <asm/arch/hardware.h>
+#include <asm/arch/regs-mmc.h>
+#include <asm/io.h>
+
+/* PXAMMC Generic default config for various CPUs */
+#if defined(CONFIG_PXA250)
+#define PXAMMC_FIFO_SIZE	1
+#define PXAMMC_MIN_SPEED	312500
+#define PXAMMC_MAX_SPEED	20000000
+#define PXAMMC_HOST_CAPS	(0)
+#elif defined(CONFIG_PXA27X)
+#define PXAMMC_CRC_SKIP
+#define PXAMMC_FIFO_SIZE	32
+#define PXAMMC_MIN_SPEED	304000
+#define PXAMMC_MAX_SPEED	19500000
+#define PXAMMC_HOST_CAPS	(MMC_MODE_4BIT)
+#elif defined(CONFIG_CPU_MONAHANS)
+#define PXAMMC_FIFO_SIZE	32
+#define PXAMMC_MIN_SPEED	304000
+#define PXAMMC_MAX_SPEED	26000000
+#define PXAMMC_HOST_CAPS	(MMC_MODE_4BIT | MMC_MODE_HS)
+#else
+#error "This CPU isn't supported by PXA MMC!"
+#endif
+
+#define MMC_STAT_ERRORS							\
+	(MMC_STAT_RES_CRC_ERROR | MMC_STAT_SPI_READ_ERROR_TOKEN |	\
+	MMC_STAT_CRC_READ_ERROR | MMC_STAT_TIME_OUT_RESPONSE |		\
+	MMC_STAT_READ_TIME_OUT | MMC_STAT_CRC_WRITE_ERROR)
+
+/* 1 millisecond (in wait cycles below it's 100 x 10uS waits) */
+#define PXA_MMC_TIMEOUT	100
+
+struct pxa_mmc_priv {
+	struct pxa_mmc_regs *regs;
+};
+
+/* Wait for bit to be set */
+static int pxa_mmc_wait(struct mmc *mmc, uint32_t mask)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	unsigned int timeout = PXA_MMC_TIMEOUT;
+
+	/* Wait for bit to be set */
+	while (--timeout) {
+		if (readl(&regs->stat) & mask)
+			break;
+		udelay(10);
+	}
+
+	if (!timeout)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int pxa_mmc_stop_clock(struct mmc *mmc)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	unsigned int timeout = PXA_MMC_TIMEOUT;
+
+	/* If the clock aren't running, exit */
+	if (!(readl(&regs->stat) & MMC_STAT_CLK_EN))
+		return 0;
+
+	/* Tell the controller to turn off the clock */
+	writel(MMC_STRPCL_STOP_CLK, &regs->strpcl);
+
+	/* Wait until the clock are off */
+	while (--timeout) {
+		if (!(readl(&regs->stat) & MMC_STAT_CLK_EN))
+			break;
+		udelay(10);
+	}
+
+	/* The clock refused to stop, scream and die a painful death */
+	if (!timeout)
+		return -ETIMEDOUT;
+
+	/* The clock stopped correctly */
+	return 0;
+}
+
+static int pxa_mmc_start_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
+				uint32_t cmdat)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	int ret;
+
+	/* The card can send a "busy" response */
+	if (cmd->flags & MMC_RSP_BUSY)
+		cmdat |= MMC_CMDAT_BUSY;
+
+	/* Inform the controller about response type */
+	switch (cmd->resp_type) {
+	case MMC_RSP_R1:
+	case MMC_RSP_R1b:
+		cmdat |= MMC_CMDAT_R1;
+		break;
+	case MMC_RSP_R2:
+		cmdat |= MMC_CMDAT_R2;
+		break;
+	case MMC_RSP_R3:
+		cmdat |= MMC_CMDAT_R3;
+		break;
+	default:
+		break;
+	}
+
+	/* Load command and it's arguments into the controller */
+	writel(cmd->cmdidx, &regs->cmd);
+	writel(cmd->cmdarg >> 16, &regs->argh);
+	writel(cmd->cmdarg & 0xffff, &regs->argl);
+	writel(cmdat, &regs->cmdat);
+
+	/* Start the controller clock and wait until they are started */
+	writel(MMC_STRPCL_START_CLK, &regs->strpcl);
+
+	ret = pxa_mmc_wait(mmc, MMC_STAT_CLK_EN);
+	if (ret)
+		return ret;
+
+	/* Correct and happy end */
+	return 0;
+}
+
+static int pxa_mmc_cmd_done(struct mmc *mmc, struct mmc_cmd *cmd)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	uint32_t a, b, c;
+	int i;
+	int stat;
+
+	/* Read the controller status */
+	stat = readl(&regs->stat);
+
+	/*
+	 * Linux says:
+	 * Did I mention this is Sick.  We always need to
+	 * discard the upper 8 bits of the first 16-bit word.
+	 */
+	a = readl(&regs->res) & 0xffff;
+	for (i = 0; i < 4; i++) {
+		b = readl(&regs->res) & 0xffff;
+		c = readl(&regs->res) & 0xffff;
+		cmd->response[i] = (a << 24) | (b << 8) | (c >> 8);
+		a = c;
+	}
+
+	/* The command response didn't arrive */
+	if (stat & MMC_STAT_TIME_OUT_RESPONSE)
+		return -ETIMEDOUT;
+	else if (stat & MMC_STAT_RES_CRC_ERROR && cmd->flags & MMC_RSP_CRC) {
+#ifdef	PXAMMC_CRC_SKIP
+		if (cmd->flags & MMC_RSP_136 && cmd->response[0] & (1 << 31))
+			printf("Ignoring CRC, this may be dangerous!\n");
+		else
+#endif
+		return -EILSEQ;
+	}
+
+	/* The command response was successfully read */
+	return 0;
+}
+
+static int pxa_mmc_do_read_xfer(struct mmc *mmc, struct mmc_data *data)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	uint32_t len;
+	uint32_t *buf = (uint32_t *)data->dest;
+	int size;
+	int ret;
+
+	len = data->blocks * data->blocksize;
+
+	while (len) {
+		/* The controller has data ready */
+		if (readl(&regs->i_reg) & MMC_I_REG_RXFIFO_RD_REQ) {
+			size = min(len, PXAMMC_FIFO_SIZE);
+			len -= size;
+			size /= 4;
+
+			/* Read data into the buffer */
+			while (size--)
+				*buf++ = readl(&regs->rxfifo);
+
+		}
+
+		if (readl(&regs->stat) & MMC_STAT_ERRORS)
+			return -EIO;
+	}
+
+	/* Wait for the transmission-done interrupt */
+	ret = pxa_mmc_wait(mmc, MMC_STAT_DATA_TRAN_DONE);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pxa_mmc_do_write_xfer(struct mmc *mmc, struct mmc_data *data)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	uint32_t len;
+	uint32_t *buf = (uint32_t *)data->src;
+	int size;
+	int ret;
+
+	len = data->blocks * data->blocksize;
+
+	while (len) {
+		/* The controller is ready to receive data */
+		if (readl(&regs->i_reg) & MMC_I_REG_TXFIFO_WR_REQ) {
+			size = min(len, PXAMMC_FIFO_SIZE);
+			len -= size;
+			size /= 4;
+
+			while (size--)
+				writel(*buf++, &regs->txfifo);
+
+			if (min(len, PXAMMC_FIFO_SIZE) < 32)
+				writel(MMC_PRTBUF_BUF_PART_FULL, &regs->prtbuf);
+		}
+
+		if (readl(&regs->stat) & MMC_STAT_ERRORS)
+			return -EIO;
+	}
+
+	/* Wait for the transmission-done interrupt */
+	ret = pxa_mmc_wait(mmc, MMC_STAT_DATA_TRAN_DONE);
+	if (ret)
+		return ret;
+
+	/* Wait until the data are really written to the card */
+	ret = pxa_mmc_wait(mmc, MMC_STAT_PRG_DONE);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pxa_mmc_request(struct mmc *mmc, struct mmc_cmd *cmd,
+				struct mmc_data *data)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	uint32_t cmdat = 0;
+	int ret;
+
+	/* Stop the controller */
+	ret = pxa_mmc_stop_clock(mmc);
+	if (ret)
+		return ret;
+
+	/* If we're doing data transfer, configure the controller accordingly */
+	if (data) {
+		writel(data->blocks, &regs->nob);
+		writel(data->blocksize, &regs->blklen);
+		/* This delay can be optimized, but stick with max value */
+		writel(0xffff, &regs->rdto);
+		cmdat |= MMC_CMDAT_DATA_EN;
+		if (data->flags & MMC_DATA_WRITE)
+			cmdat |= MMC_CMDAT_WRITE;
+	}
+
+	/* Run in 4bit mode if the card can do it */
+	if (mmc->bus_width == 4)
+		cmdat |= MMC_CMDAT_SD_4DAT;
+
+	/* Execute the command */
+	ret = pxa_mmc_start_cmd(mmc, cmd, cmdat);
+	if (ret)
+		return ret;
+
+	/* Wait until the command completes */
+	ret = pxa_mmc_wait(mmc, MMC_STAT_END_CMD_RES);
+	if (ret)
+		return ret;
+
+	/* Read back the result */
+	ret = pxa_mmc_cmd_done(mmc, cmd);
+	if (ret)
+		return ret;
+
+	/* In case there was a data transfer scheduled, do it */
+	if (data) {
+		if (data->flags & MMC_DATA_WRITE)
+			pxa_mmc_do_write_xfer(mmc, data);
+		else
+			pxa_mmc_do_read_xfer(mmc, data);
+	}
+
+	return 0;
+}
+
+static void pxa_mmc_set_ios(struct mmc *mmc)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+	uint32_t tmp;
+	uint32_t pxa_mmc_clock;
+
+	if (!mmc->clock) {
+		pxa_mmc_stop_clock(mmc);
+		return;
+	}
+
+	/* PXA3xx can do 26MHz with special settings. */
+	if (mmc->clock == 26000000) {
+		writel(0x7, &regs->clkrt);
+		return;
+	}
+
+	/* Set clock to the card the usual way. */
+	pxa_mmc_clock = 0;
+	tmp = mmc->f_max / mmc->clock;
+	tmp += tmp % 2;
+
+	while (tmp > 1) {
+		pxa_mmc_clock++;
+		tmp >>= 1;
+	}
+
+	writel(pxa_mmc_clock, &regs->clkrt);
+}
+
+static int pxa_mmc_init(struct mmc *mmc)
+{
+	struct pxa_mmc_priv *priv = (struct pxa_mmc_priv *)mmc->priv;
+	struct pxa_mmc_regs *regs = priv->regs;
+
+	/* Make sure the clock are stopped */
+	pxa_mmc_stop_clock(mmc);
+
+	/* Turn off SPI mode */
+	writel(0, &regs->spi);
+
+	/* Set up maximum timeout to wait for command response */
+	writel(MMC_RES_TO_MAX_MASK, &regs->resto);
+
+	/* Mask all interrupts */
+	writel(~(MMC_I_MASK_TXFIFO_WR_REQ | MMC_I_MASK_RXFIFO_RD_REQ),
+		&regs->i_mask);
+	return 0;
+}
+
+int pxa_mmc_register(int card_index)
+{
+	struct mmc *mmc;
+	struct pxa_mmc_priv *priv;
+	uint32_t reg;
+	int ret = -ENOMEM;
+
+	mmc = malloc(sizeof(struct mmc));
+	if (!mmc)
+		goto err0;
+
+	priv = malloc(sizeof(struct pxa_mmc_priv));
+	if (!priv)
+		goto err1;
+
+	switch (card_index) {
+	case 0:
+		priv->regs = (struct pxa_mmc_regs *)MMC0_BASE;
+		break;
+	case 1:
+		priv->regs = (struct pxa_mmc_regs *)MMC1_BASE;
+		break;
+	default:
+		printf("PXA MMC: Invalid MMC controller ID (card_index = %d)\n",
+			card_index);
+		goto err2;
+	}
+
+	mmc->priv = priv;
+
+	sprintf(mmc->name, "PXA MMC");
+	mmc->send_cmd	= pxa_mmc_request;
+	mmc->set_ios	= pxa_mmc_set_ios;
+	mmc->init	= pxa_mmc_init;
+
+	mmc->voltages	= MMC_VDD_32_33 | MMC_VDD_33_34;
+	mmc->f_max	= PXAMMC_MAX_SPEED;
+	mmc->f_min	= PXAMMC_MIN_SPEED;
+	mmc->host_caps	= PXAMMC_HOST_CAPS;
+
+	mmc->b_max = 0;
+
+#ifndef	CONFIG_CPU_MONAHANS	/* PXA2xx */
+	reg = readl(CKEN);
+	reg |= CKEN12_MMC;
+	writel(reg, CKEN);
+#else				/* PXA3xx */
+	reg = readl(CKENA);
+	reg |= CKENA_12_MMC0 | CKENA_13_MMC1;
+	writel(reg, CKENA);
+#endif
+
+	mmc_register(mmc);
+
+	return 0;
+
+err2:
+	free(priv);
+err1:
+	free(mmc);
+err0:
+	return ret;
+}
diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c
index 9ebd33d..fce0ef0 100644
--- a/drivers/mmc/sdhci.c
+++ b/drivers/mmc/sdhci.c
@@ -81,8 +81,9 @@
 static int sdhci_transfer_data(struct sdhci_host *host, struct mmc_data *data,
 				unsigned int start_addr)
 {
-	unsigned int stat, rdy, mask, block = 0;
+	unsigned int stat, rdy, mask, timeout, block = 0;
 
+	timeout = 10000;
 	rdy = SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DATA_AVAIL;
 	mask = SDHCI_DATA_AVAILABLE | SDHCI_SPACE_AVAILABLE;
 	do {
@@ -103,11 +104,17 @@
 #ifdef CONFIG_MMC_SDMA
 		if (stat & SDHCI_INT_DMA_END) {
 			sdhci_writel(host, SDHCI_INT_DMA_END, SDHCI_INT_STATUS);
-			start_addr &= SDHCI_DEFAULT_BOUNDARY_SIZE - 1;
+			start_addr &= ~(SDHCI_DEFAULT_BOUNDARY_SIZE - 1);
 			start_addr += SDHCI_DEFAULT_BOUNDARY_SIZE;
 			sdhci_writel(host, start_addr, SDHCI_DMA_ADDRESS);
 		}
 #endif
+		if (timeout-- > 0)
+			udelay(10);
+		else {
+			printf("Transfer data timeout\n");
+			return -1;
+		}
 	} while (!(stat & SDHCI_INT_DATA_END));
 	return 0;
 }
@@ -196,7 +203,7 @@
 
 	sdhci_writel(host, cmd->cmdarg, SDHCI_ARGUMENT);
 #ifdef CONFIG_MMC_SDMA
-	flush_cache(0, ~0);
+	flush_cache(start_addr, trans_bytes);
 #endif
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->cmdidx, flags), SDHCI_COMMAND);
 	do {
@@ -377,6 +384,7 @@
 	}
 
 	mmc->priv = host;
+	host->mmc = mmc;
 
 	sprintf(mmc->name, "%s", host->name);
 	mmc->send_cmd = sdhci_send_command;
diff --git a/drivers/mmc/tegra2_mmc.c b/drivers/mmc/tegra2_mmc.c
index 9e741f2..78b1190 100644
--- a/drivers/mmc/tegra2_mmc.c
+++ b/drivers/mmc/tegra2_mmc.c
@@ -435,14 +435,22 @@
 
 static int tegra2_mmc_initialize(int dev_index, int bus_width)
 {
+	struct mmc_host *host;
 	struct mmc *mmc;
 
 	debug(" mmc_initialize called\n");
 
+	host = &mmc_host[dev_index];
+
+	host->clock = 0;
+	tegra2_get_setup(host, dev_index);
+
+	clock_start_periph_pll(host->mmc_id, CLOCK_ID_PERIPH, 20000000);
+
 	mmc = &mmc_dev[dev_index];
 
 	sprintf(mmc->name, "Tegra2 SD/MMC");
-	mmc->priv = &mmc_host[dev_index];
+	mmc->priv = host;
 	mmc->send_cmd = mmc_send_cmd;
 	mmc->set_ios = mmc_set_ios;
 	mmc->init = mmc_core_init;
@@ -465,8 +473,6 @@
 	mmc->f_min = 375000;
 	mmc->f_max = 48000000;
 
-	mmc_host[dev_index].clock = 0;
-	tegra2_get_setup(&mmc_host[dev_index], dev_index);
 	mmc_register(mmc);
 
 	return 0;
diff --git a/include/mmc.h b/include/mmc.h
index 53aff9b..015a7f3 100644
--- a/include/mmc.h
+++ b/include/mmc.h
@@ -145,13 +145,15 @@
 /*
  * EXT_CSD fields
  */
-
-#define EXT_CSD_PART_CONF	179	/* R/W */
-#define EXT_CSD_BUS_WIDTH	183	/* R/W */
-#define EXT_CSD_HS_TIMING	185	/* R/W */
-#define EXT_CSD_CARD_TYPE	196	/* RO */
-#define EXT_CSD_REV		192	/* RO */
-#define EXT_CSD_SEC_CNT		212	/* RO, 4 bytes */
+#define EXT_CSD_PARTITIONING_SUPPORT	160	/* RO */
+#define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
+#define EXT_CSD_PART_CONF		179	/* R/W */
+#define EXT_CSD_BUS_WIDTH		183	/* R/W */
+#define EXT_CSD_HS_TIMING		185	/* R/W */
+#define EXT_CSD_REV			192	/* RO */
+#define EXT_CSD_CARD_TYPE		196	/* RO */
+#define EXT_CSD_SEC_CNT			212	/* RO, 4 bytes */
+#define EXT_CSD_HC_ERASE_GRP_SIZE	224	/* RO */
 
 /*
  * EXT_CSD field definitions
diff --git a/include/sdhci.h b/include/sdhci.h
index 6d52ce9..0690938 100644
--- a/include/sdhci.h
+++ b/include/sdhci.h
@@ -27,6 +27,8 @@
 #define __SDHCI_HW_H
 
 #include <asm/io.h>
+#include <mmc.h>
+
 /*
  * Controller registers
  */
@@ -214,6 +216,9 @@
  */
 #define SDHCI_QUIRK_32BIT_DMA_ADDR	(1 << 0)
 
+/* to make gcc happy */
+struct sdhci_host;
+
 /*
  * Host SDMA buffer boundary. Valid values from 4K to 512K in powers of 2.
  */
@@ -236,6 +241,7 @@
 	unsigned int quirks;
 	unsigned int version;
 	unsigned int clock;
+	struct mmc *mmc;
 	const struct sdhci_ops *ops;
 };