arm: mvebu: Add complete SDRAM ECC scrubbing

This patch introduces the SDRAM scrubbing for ECC enabled board
to fill/initialize the ECC bytes. This is done via the XOR engine
to speed up the process. The scrubbing is a 2-stage process:

1) SPL scrubs the area 0 - 0x100.0000 (16MiB) for the main U-Boot
2) U-Boot scrubs the remaining SDRAM area(s)

Signed-off-by: Stefan Roese <sr@denx.de>
Cc: Luka Perkov <luka.perkov@sartura.hr>
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index 446ce04..21c56a4 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -14,6 +14,10 @@
 
 obj-y	= cpu.o
 obj-y	+= dram.o
+ifndef CONFIG_SPL_BUILD
+obj-$(CONFIG_SYS_MVEBU_DDR_A38X) += ../../../drivers/ddr/marvell/a38x/xor.o
+obj-$(CONFIG_SYS_MVEBU_DDR_AXP) += ../../../drivers/ddr/marvell/axp/xor.o
+endif
 obj-y	+= gpio.o
 obj-y	+= mbus.o
 obj-y	+= timer.o
diff --git a/arch/arm/mach-mvebu/dram.c b/arch/arm/mach-mvebu/dram.c
index a75ada3..ddc5b7e 100644
--- a/arch/arm/mach-mvebu/dram.c
+++ b/arch/arm/mach-mvebu/dram.c
@@ -13,10 +13,12 @@
 #include <asm/arch/soc.h>
 
 #ifdef CONFIG_SYS_MVEBU_DDR_A38X
-#include "../../../drivers/ddr/marvell/a38x/ddr3_init.h"
+#include "../../../drivers/ddr/marvell/axp/xor.h"
+#include "../../../drivers/ddr/marvell/axp/xor_regs.h"
 #endif
 #ifdef CONFIG_SYS_MVEBU_DDR_AXP
-#include "../../../drivers/ddr/marvell/axp/ddr3_init.h"
+#include "../../../drivers/ddr/marvell/axp/xor.h"
+#include "../../../drivers/ddr/marvell/axp/xor_regs.h"
 #endif
 
 DECLARE_GLOBAL_DATA_PTR;
@@ -37,6 +39,14 @@
 
 #define SDRAM_SIZE_MAX			0xc0000000
 
+#define SCRUB_MAGIC		0xbeefdead
+
+#define SCRB_XOR_UNIT		0
+#define SCRB_XOR_CHAN		1
+#define SCRB_XOR_WIN		0
+
+#define XEBARX_BASE_OFFS	16
+
 /*
  * mvebu_sdram_bar - reads SDRAM Base Address Register
  */
@@ -102,6 +112,124 @@
 	mvebu_sdram_bs_set(bank, size);
 }
 
+#if defined(CONFIG_SYS_MVEBU_DDR_A38X) || defined(CONFIG_SYS_MVEBU_DDR_AXP)
+static u32 xor_ctrl_save;
+static u32 xor_base_save;
+static u32 xor_mask_save;
+
+static void mv_xor_init2(u32 cs)
+{
+	u32 reg, base, size, base2;
+	u32 bank_attr[4] = { 0xe00, 0xd00, 0xb00, 0x700 };
+
+	xor_ctrl_save = reg_read(XOR_WINDOW_CTRL_REG(SCRB_XOR_UNIT,
+						     SCRB_XOR_CHAN));
+	xor_base_save = reg_read(XOR_BASE_ADDR_REG(SCRB_XOR_UNIT,
+						   SCRB_XOR_WIN));
+	xor_mask_save = reg_read(XOR_SIZE_MASK_REG(SCRB_XOR_UNIT,
+						   SCRB_XOR_WIN));
+
+	/* Enable Window x for each CS */
+	reg = 0x1;
+	reg |= (0x3 << 16);
+	reg_write(XOR_WINDOW_CTRL_REG(SCRB_XOR_UNIT, SCRB_XOR_CHAN), reg);
+
+	base = 0;
+	size = mvebu_sdram_bs(cs) - 1;
+	if (size) {
+		base2 = ((base / (64 << 10)) << XEBARX_BASE_OFFS) |
+			bank_attr[cs];
+		reg_write(XOR_BASE_ADDR_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN),
+			  base2);
+
+		base += size + 1;
+		size = (size / (64 << 10)) << 16;
+		/* Window x - size - 256 MB */
+		reg_write(XOR_SIZE_MASK_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN), size);
+	}
+
+	mv_xor_hal_init(0);
+
+	return;
+}
+
+static void mv_xor_finish2(void)
+{
+	reg_write(XOR_WINDOW_CTRL_REG(SCRB_XOR_UNIT, SCRB_XOR_CHAN),
+		  xor_ctrl_save);
+	reg_write(XOR_BASE_ADDR_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN),
+		  xor_base_save);
+	reg_write(XOR_SIZE_MASK_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN),
+		  xor_mask_save);
+}
+
+static void dram_ecc_scrubbing(void)
+{
+	int cs;
+	u32 size, temp;
+	u32 total_mem = 0;
+	u64 total;
+	u32 start_addr;
+
+	/*
+	 * The DDR training code from the bin_hdr / SPL already
+	 * scrubbed the DDR till 0x1000000. And the main U-Boot
+	 * is loaded to an address < 0x1000000. So we need to
+	 * skip this range to not re-scrub this area again.
+	 */
+	temp = reg_read(REG_SDRAM_CONFIG_ADDR);
+	temp |= (1 << REG_SDRAM_CONFIG_IERR_OFFS);
+	reg_write(REG_SDRAM_CONFIG_ADDR, temp);
+
+	for (cs = 0; cs < CONFIG_NR_DRAM_BANKS; cs++) {
+		size = mvebu_sdram_bs(cs) - 1;
+		if (size == 0)
+			continue;
+
+		total = (u64)size + 1;
+		total_mem += (u32)(total / (1 << 30));
+		start_addr = 0;
+		mv_xor_init2(cs);
+
+		/* Skip first 16 MiB */
+		if (0 == cs) {
+			start_addr = 0x1000000;
+			size -= start_addr;
+		}
+
+		mv_xor_mem_init(SCRB_XOR_CHAN, start_addr, size,
+				SCRUB_MAGIC, SCRUB_MAGIC);
+
+		/* Wait for previous transfer completion */
+		while (mv_xor_state_get(SCRB_XOR_CHAN) != MV_IDLE)
+			;
+
+		mv_xor_finish2();
+	}
+
+	temp = reg_read(REG_SDRAM_CONFIG_ADDR);
+	temp &= ~(1 << REG_SDRAM_CONFIG_IERR_OFFS);
+	reg_write(REG_SDRAM_CONFIG_ADDR, temp);
+}
+
+static int ecc_enabled(void)
+{
+	if (reg_read(REG_SDRAM_CONFIG_ADDR) & (1 << REG_SDRAM_CONFIG_ECC_OFFS))
+		return 1;
+
+	return 0;
+}
+#else
+static void dram_ecc_scrubbing(void)
+{
+}
+
+static int ecc_enabled(void)
+{
+	return 0;
+}
+#endif
+
 int dram_init(void)
 {
 	u64 size = 0;
@@ -135,6 +263,10 @@
 		gd->bd->bi_dram[i].size = 0;
 	}
 
+
+	if (ecc_enabled())
+		dram_ecc_scrubbing();
+
 	gd->ram_size = size;
 
 	return 0;
@@ -162,10 +294,7 @@
 
 void board_add_ram_info(int use_default)
 {
-	u32 reg;
-
-	reg = reg_read(REG_SDRAM_CONFIG_ADDR);
-	if (reg & (1 << REG_SDRAM_CONFIG_ECC_OFFS))
+	if (ecc_enabled())
 		printf(" (ECC");
 	else
 		printf(" (ECC not");
diff --git a/drivers/ddr/marvell/axp/xor.c b/drivers/ddr/marvell/axp/xor.c
index 66c96ae..54924ca 100644
--- a/drivers/ddr/marvell/axp/xor.c
+++ b/drivers/ddr/marvell/axp/xor.c
@@ -18,7 +18,6 @@
 static u32 xor_regs_base_backup[MAX_CS];
 static u32 xor_regs_mask_backup[MAX_CS];
 
-static void mv_xor_hal_init(u32 chan_num);
 static int mv_xor_cmd_set(u32 chan, int command);
 static int mv_xor_ctrl_set(u32 chan, u32 xor_ctrl);
 
@@ -110,7 +109,7 @@
  * RETURN:
  *       MV_BAD_PARAM if parameters to function invalid, MV_OK otherwise.
  */
-static void mv_xor_hal_init(u32 chan_num)
+void mv_xor_hal_init(u32 chan_num)
 {
 	u32 i;
 
diff --git a/drivers/ddr/marvell/axp/xor.h b/drivers/ddr/marvell/axp/xor.h
index 3536487..3ff784d 100644
--- a/drivers/ddr/marvell/axp/xor.h
+++ b/drivers/ddr/marvell/axp/xor.h
@@ -60,6 +60,7 @@
 	u32 src_addr1;		/* Mode: Source Block address pointer */
 } __packed;
 
+void mv_xor_hal_init(u32 chan_num);
 int mv_xor_state_get(u32 chan);
 void mv_sys_xor_init(MV_DRAM_INFO *dram_info);
 void mv_sys_xor_finish(void);