sunxi: Add H616 DRAM support

Allwinner H616 supports many types of DRAM. Most notably it supports
LPDDR4. However, all commercially available boards at this time use
only DDR3, so this commit adds only DDR3 support.

Controller and MBUS are very similar to H6 but PHY is completely
unknown.

Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Acked-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/arch/arm/include/asm/arch-sunxi/dram.h b/arch/arm/include/asm/arch-sunxi/dram.h
index 8002b7e..c3b3e1f 100644
--- a/arch/arm/include/asm/arch-sunxi/dram.h
+++ b/arch/arm/include/asm/arch-sunxi/dram.h
@@ -29,6 +29,8 @@
 #include <asm/arch/dram_sun9i.h>
 #elif defined(CONFIG_MACH_SUN50I_H6)
 #include <asm/arch/dram_sun50i_h6.h>
+#elif defined(CONFIG_MACH_SUN50I_H616)
+#include <asm/arch/dram_sun50i_h616.h>
 #else
 #include <asm/arch/dram_sun4i.h>
 #endif
diff --git a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h616.h b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h616.h
new file mode 100644
index 0000000..134679d
--- /dev/null
+++ b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h616.h
@@ -0,0 +1,159 @@
+/*
+ * H616 dram controller register and constant defines
+ *
+ * (C) Copyright 2020  Jernej Skrabec <jernej.skrabec@siol.net>
+ *
+ * Based on H6 one, which is:
+ * (C) Copyright 2017  Icenowy Zheng <icenowy@aosc.io>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _SUNXI_DRAM_SUN50I_H616_H
+#define _SUNXI_DRAM_SUN50I_H616_H
+
+#include <stdbool.h>
+#ifndef __ASSEMBLY__
+#include <linux/bitops.h>
+#endif
+
+enum sunxi_dram_type {
+	SUNXI_DRAM_TYPE_DDR3 = 3,
+	SUNXI_DRAM_TYPE_DDR4,
+	SUNXI_DRAM_TYPE_LPDDR3 = 7,
+	SUNXI_DRAM_TYPE_LPDDR4
+};
+
+/* MBUS part is largely the same as in H6, except for one special register */
+struct sunxi_mctl_com_reg {
+	u32 cr;			/* 0x000 control register */
+	u8 reserved_0x004[4];	/* 0x004 */
+	u32 unk_0x008;		/* 0x008 */
+	u32 tmr;		/* 0x00c timer register */
+	u8 reserved_0x010[4];	/* 0x010 */
+	u32 unk_0x014;		/* 0x014 */
+	u8 reserved_0x018[8];	/* 0x018 */
+	u32 maer0;		/* 0x020 master enable register 0 */
+	u32 maer1;		/* 0x024 master enable register 1 */
+	u32 maer2;		/* 0x028 master enable register 2 */
+	u8 reserved_0x02c[468];	/* 0x02c */
+	u32 bwcr;		/* 0x200 bandwidth control register */
+	u8 reserved_0x204[12];	/* 0x204 */
+	/*
+	 * The last master configured by BSP libdram is at 0x49x, so the
+	 * size of this struct array is set to 41 (0x29) now.
+	 */
+	struct {
+		u32 cfg0;		/* 0x0 */
+		u32 cfg1;		/* 0x4 */
+		u8 reserved_0x8[8];	/* 0x8 */
+	} master[41];		/* 0x210 + index * 0x10 */
+	u8 reserved_0x4a0[96];	/* 0x4a0 */
+	u32 unk_0x500;		/* 0x500 */
+};
+check_member(sunxi_mctl_com_reg, unk_0x500, 0x500);
+
+/*
+ * Controller registers seems to be the same or at least very similar
+ * to those in H6.
+ */
+struct sunxi_mctl_ctl_reg {
+	u32 mstr;		/* 0x000 */
+	u32 statr;		/* 0x004 unused */
+	u32 mstr1;		/* 0x008 unused */
+	u32 clken;		/* 0x00c */
+	u32 mrctrl0;		/* 0x010 unused */
+	u32 mrctrl1;		/* 0x014 unused */
+	u32 mrstatr;		/* 0x018 unused */
+	u32 mrctrl2;		/* 0x01c unused */
+	u32 derateen;		/* 0x020 unused */
+	u32 derateint;		/* 0x024 unused */
+	u8 reserved_0x028[8];	/* 0x028 */
+	u32 pwrctl;		/* 0x030 unused */
+	u32 pwrtmg;		/* 0x034 unused */
+	u32 hwlpctl;		/* 0x038 unused */
+	u8 reserved_0x03c[20];	/* 0x03c */
+	u32 rfshctl0;		/* 0x050 unused */
+	u32 rfshctl1;		/* 0x054 unused */
+	u8 reserved_0x058[8];	/* 0x05c */
+	u32 rfshctl3;		/* 0x060 */
+	u32 rfshtmg;		/* 0x064 */
+	u8 reserved_0x068[104];	/* 0x068 */
+	u32 init[8];		/* 0x0d0 */
+	u32 dimmctl;		/* 0x0f0 unused */
+	u32 rankctl;		/* 0x0f4 */
+	u8 reserved_0x0f8[8];	/* 0x0f8 */
+	u32 dramtmg[17];	/* 0x100 */
+	u8 reserved_0x144[60];	/* 0x144 */
+	u32 zqctl[3];		/* 0x180 */
+	u32 zqstat;		/* 0x18c unused */
+	u32 dfitmg0;		/* 0x190 */
+	u32 dfitmg1;		/* 0x194 */
+	u32 dfilpcfg[2];	/* 0x198 unused */
+	u32 dfiupd[3];		/* 0x1a0 */
+	u32 reserved_0x1ac;	/* 0x1ac */
+	u32 dfimisc;		/* 0x1b0 */
+	u32 dfitmg2;		/* 0x1b4 unused */
+	u32 dfitmg3;		/* 0x1b8 unused */
+	u32 dfistat;		/* 0x1bc */
+	u32 dbictl;		/* 0x1c0 */
+	u8 reserved_0x1c4[60];	/* 0x1c4 */
+	u32 addrmap[12];	/* 0x200 */
+	u8 reserved_0x230[16];	/* 0x230 */
+	u32 odtcfg;		/* 0x240 */
+	u32 odtmap;		/* 0x244 */
+	u8 reserved_0x248[8];	/* 0x248 */
+	u32 sched[2];		/* 0x250 */
+	u8 reserved_0x258[180];	/* 0x258 */
+	u32 dbgcmd;		/* 0x30c unused */
+	u32 dbgstat;		/* 0x310 unused */
+	u8 reserved_0x314[12];	/* 0x314 */
+	u32 swctl;		/* 0x320 */
+	u32 swstat;		/* 0x324 */
+	u8 reserved_0x328[7768];/* 0x328 */
+	u32 unk_0x2180;		/* 0x2180 */
+	u8 reserved_0x2184[188];/* 0x2184 */
+	u32 unk_0x2240;		/* 0x2240 */
+	u8 reserved_0x2244[3900];/* 0x2244 */
+	u32 unk_0x3180;		/* 0x3180 */
+	u8 reserved_0x3184[188];/* 0x3184 */
+	u32 unk_0x3240;		/* 0x3240 */
+	u8 reserved_0x3244[3900];/* 0x3244 */
+	u32 unk_0x4180;		/* 0x4180 */
+	u8 reserved_0x4184[188];/* 0x4184 */
+	u32 unk_0x4240;		/* 0x4240 */
+};
+check_member(sunxi_mctl_ctl_reg, swstat, 0x324);
+check_member(sunxi_mctl_ctl_reg, unk_0x4240, 0x4240);
+
+#define MSTR_DEVICETYPE_DDR3	BIT(0)
+#define MSTR_DEVICETYPE_LPDDR2	BIT(2)
+#define MSTR_DEVICETYPE_LPDDR3	BIT(3)
+#define MSTR_DEVICETYPE_DDR4	BIT(4)
+#define MSTR_DEVICETYPE_MASK	GENMASK(5, 0)
+#define MSTR_2TMODE		BIT(10)
+#define MSTR_BUSWIDTH_FULL	(0 << 12)
+#define MSTR_BUSWIDTH_HALF	(1 << 12)
+#define MSTR_ACTIVE_RANKS(x)	(((x == 2) ? 3 : 1) << 24)
+#define MSTR_BURST_LENGTH(x)	(((x) >> 1) << 16)
+
+struct dram_para {
+	u32 clk;
+	enum sunxi_dram_type type;
+	u8 cols;
+	u8 rows;
+	u8 ranks;
+	u8 bus_full_width;
+};
+
+
+static inline int ns_to_t(int nanoseconds)
+{
+	const unsigned int ctrl_freq = CONFIG_DRAM_CLK / 2;
+
+	return DIV_ROUND_UP(ctrl_freq * nanoseconds, 1000);
+}
+
+void mctl_set_timing_params(struct dram_para *para);
+
+#endif /* _SUNXI_DRAM_SUN50I_H616_H */
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index edb9973..d9924f9 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -48,6 +48,46 @@
 	  Select this dram controller driver for some sun50i platforms,
 	  like H6.
 
+config DRAM_SUN50I_H616
+	bool
+	help
+	  Select this dram controller driver for some sun50i platforms,
+	  like H616.
+
+if DRAM_SUN50I_H616
+config DRAM_SUN50I_H616_WRITE_LEVELING
+	bool "H616 DRAM write leveling"
+	---help---
+	  Select this when DRAM on your H616 board needs write leveling.
+
+config DRAM_SUN50I_H616_READ_CALIBRATION
+	bool "H616 DRAM read calibration"
+	---help---
+	  Select this when DRAM on your H616 board needs read calibration.
+
+config DRAM_SUN50I_H616_READ_TRAINING
+	bool "H616 DRAM read training"
+	---help---
+	  Select this when DRAM on your H616 board needs read training.
+
+config DRAM_SUN50I_H616_WRITE_TRAINING
+	bool "H616 DRAM write training"
+	---help---
+	  Select this when DRAM on your H616 board needs write training.
+
+config DRAM_SUN50I_H616_BIT_DELAY_COMPENSATION
+	bool "H616 DRAM bit delay compensation"
+	---help---
+	  Select this when DRAM on your H616 board needs bit delay
+	  compensation.
+
+config DRAM_SUN50I_H616_UNKNOWN_FEATURE
+	bool "H616 DRAM unknown feature"
+	---help---
+	  Select this when DRAM on your H616 board needs this unknown
+	  feature.
+endif
+
 config SUN6I_P2WI
 	bool "Allwinner sun6i internal P2WI controller"
 	help
@@ -424,6 +464,7 @@
 		       MACH_SUN8I_V3S
 	default 672 if MACH_SUN50I
 	default 744 if MACH_SUN50I_H6
+	default 720 if MACH_SUN50I_H616
 	---help---
 	Set the dram clock speed, valid range 240 - 480 (prior to sun9i),
 	must be a multiple of 24. For the sun9i (A80), the tested values
@@ -440,6 +481,7 @@
 
 config DRAM_ZQ
 	int "sunxi dram zq value"
+	depends on !MACH_SUN50I_H616
 	default 123 if MACH_SUN4I || MACH_SUN5I || MACH_SUN6I || \
 		       MACH_SUN8I_A23 || MACH_SUN8I_A33 || MACH_SUN8I_A83T
 	default 127 if MACH_SUN7I
@@ -457,6 +499,7 @@
 	default y if MACH_SUN8I_R40
 	default y if MACH_SUN50I
 	default y if MACH_SUN50I_H6
+	default y if MACH_SUN50I_H616
 	---help---
 	Select this to enable dram odt (on die termination).
 
diff --git a/arch/arm/mach-sunxi/Makefile b/arch/arm/mach-sunxi/Makefile
index b8aca43..3f081d9 100644
--- a/arch/arm/mach-sunxi/Makefile
+++ b/arch/arm/mach-sunxi/Makefile
@@ -40,4 +40,6 @@
 obj-$(CONFIG_SUNXI_DRAM_DW)	+= dram_timings/
 obj-$(CONFIG_DRAM_SUN50I_H6)	+= dram_sun50i_h6.o
 obj-$(CONFIG_DRAM_SUN50I_H6)	+= dram_timings/
+obj-$(CONFIG_DRAM_SUN50I_H616)	+= dram_sun50i_h616.o
+obj-$(CONFIG_DRAM_SUN50I_H616)	+= dram_timings/
 endif
diff --git a/arch/arm/mach-sunxi/dram_sun50i_h616.c b/arch/arm/mach-sunxi/dram_sun50i_h616.c
new file mode 100644
index 0000000..ef58769
--- /dev/null
+++ b/arch/arm/mach-sunxi/dram_sun50i_h616.c
@@ -0,0 +1,1023 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * sun50i H616 platform dram controller driver
+ *
+ * While controller is very similar to that in H6, PHY is completely
+ * unknown. That's why this driver has plenty of magic numbers. Some
+ * meaning was nevertheless deduced from strings found in boot0 and
+ * known meaning of some dram parameters.
+ * This driver only supports DDR3 memory and omits logic for all
+ * other supported types supported by hardware.
+ *
+ * (C) Copyright 2020 Jernej Skrabec <jernej.skrabec@siol.net>
+ *
+ */
+#include <common.h>
+#include <init.h>
+#include <log.h>
+#include <asm/io.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/dram.h>
+#include <asm/arch/cpu.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/kconfig.h>
+
+enum {
+	MBUS_QOS_LOWEST = 0,
+	MBUS_QOS_LOW,
+	MBUS_QOS_HIGH,
+	MBUS_QOS_HIGHEST
+};
+
+inline void mbus_configure_port(u8 port,
+				bool bwlimit,
+				bool priority,
+				u8 qos,
+				u8 waittime,
+				u8 acs,
+				u16 bwl0,
+				u16 bwl1,
+				u16 bwl2)
+{
+	struct sunxi_mctl_com_reg * const mctl_com =
+			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+	const u32 cfg0 = ( (bwlimit ? (1 << 0) : 0)
+			   | (priority ? (1 << 1) : 0)
+			   | ((qos & 0x3) << 2)
+			   | ((waittime & 0xf) << 4)
+			   | ((acs & 0xff) << 8)
+			   | (bwl0 << 16) );
+	const u32 cfg1 = ((u32)bwl2 << 16) | (bwl1 & 0xffff);
+
+	debug("MBUS port %d cfg0 %08x cfg1 %08x\n", port, cfg0, cfg1);
+	writel_relaxed(cfg0, &mctl_com->master[port].cfg0);
+	writel_relaxed(cfg1, &mctl_com->master[port].cfg1);
+}
+
+#define MBUS_CONF(port, bwlimit, qos, acs, bwl0, bwl1, bwl2)	\
+	mbus_configure_port(port, bwlimit, false, \
+			    MBUS_QOS_ ## qos, 0, acs, bwl0, bwl1, bwl2)
+
+static void mctl_set_master_priority(void)
+{
+	struct sunxi_mctl_com_reg * const mctl_com =
+			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+	/* enable bandwidth limit windows and set windows size 1us */
+	writel(399, &mctl_com->tmr);
+	writel(BIT(16), &mctl_com->bwcr);
+
+	MBUS_CONF( 0, true, HIGHEST, 0,  256,  128,  100);
+	MBUS_CONF( 1, true,    HIGH, 0, 1536, 1400,  256);
+	MBUS_CONF( 2, true, HIGHEST, 0,  512,  256,   96);
+	MBUS_CONF( 3, true,    HIGH, 0,  256,  100,   80);
+	MBUS_CONF( 4, true,    HIGH, 2, 8192, 5500, 5000);
+	MBUS_CONF( 5, true,    HIGH, 2,  100,   64,   32);
+	MBUS_CONF( 6, true,    HIGH, 2,  100,   64,   32);
+	MBUS_CONF( 8, true,    HIGH, 0,  256,  128,   64);
+	MBUS_CONF(11, true,    HIGH, 0,  256,  128,  100);
+	MBUS_CONF(14, true,    HIGH, 0, 1024,  256,   64);
+	MBUS_CONF(16, true, HIGHEST, 6, 8192, 2800, 2400);
+	MBUS_CONF(21, true, HIGHEST, 6, 2048,  768,  512);
+	MBUS_CONF(25, true, HIGHEST, 0,  100,   64,   32);
+	MBUS_CONF(26, true,    HIGH, 2, 8192, 5500, 5000);
+	MBUS_CONF(37, true,    HIGH, 0,  256,  128,   64);
+	MBUS_CONF(38, true,    HIGH, 2,  100,   64,   32);
+	MBUS_CONF(39, true,    HIGH, 2, 8192, 5500, 5000);
+	MBUS_CONF(40, true,    HIGH, 2,  100,   64,   32);
+
+	dmb();
+}
+
+static void mctl_sys_init(struct dram_para *para)
+{
+	struct sunxi_ccm_reg * const ccm =
+			(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+	struct sunxi_mctl_com_reg * const mctl_com =
+			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+	struct sunxi_mctl_ctl_reg * const mctl_ctl =
+			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
+
+	/* Put all DRAM-related blocks to reset state */
+	clrbits_le32(&ccm->mbus_cfg, MBUS_ENABLE);
+	clrbits_le32(&ccm->mbus_cfg, MBUS_RESET);
+	clrbits_le32(&ccm->dram_gate_reset, BIT(GATE_SHIFT));
+	udelay(5);
+	clrbits_le32(&ccm->dram_gate_reset, BIT(RESET_SHIFT));
+	clrbits_le32(&ccm->pll5_cfg, CCM_PLL5_CTRL_EN);
+	clrbits_le32(&ccm->dram_clk_cfg, DRAM_MOD_RESET);
+
+	udelay(5);
+
+	/* Set PLL5 rate to doubled DRAM clock rate */
+	writel(CCM_PLL5_CTRL_EN | CCM_PLL5_LOCK_EN | CCM_PLL5_OUT_EN |
+	       CCM_PLL5_CTRL_N(para->clk * 2 / 24 - 1), &ccm->pll5_cfg);
+	mctl_await_completion(&ccm->pll5_cfg, CCM_PLL5_LOCK, CCM_PLL5_LOCK);
+
+	/* Configure DRAM mod clock */
+	writel(DRAM_CLK_SRC_PLL5, &ccm->dram_clk_cfg);
+	writel(BIT(RESET_SHIFT), &ccm->dram_gate_reset);
+	udelay(5);
+	setbits_le32(&ccm->dram_gate_reset, BIT(GATE_SHIFT));
+
+	/* Disable all channels */
+	writel(0, &mctl_com->maer0);
+	writel(0, &mctl_com->maer1);
+	writel(0, &mctl_com->maer2);
+
+	/* Configure MBUS and enable DRAM mod reset */
+	setbits_le32(&ccm->mbus_cfg, MBUS_RESET);
+	setbits_le32(&ccm->mbus_cfg, MBUS_ENABLE);
+
+	clrbits_le32(&mctl_com->unk_0x500, BIT(25));
+
+	setbits_le32(&ccm->dram_clk_cfg, DRAM_MOD_RESET);
+	udelay(5);
+
+	/* Unknown hack, which enables access of mctl_ctl regs */
+	writel(0x8000, &mctl_ctl->clken);
+}
+
+static void mctl_set_addrmap(struct dram_para *para)
+{
+	struct sunxi_mctl_ctl_reg * const mctl_ctl =
+			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
+	u8 cols = para->cols;
+	u8 rows = para->rows;
+	u8 ranks = para->ranks;
+
+	if (!para->bus_full_width)
+		cols -= 1;
+
+	/* Ranks */
+	if (ranks == 2)
+		mctl_ctl->addrmap[0] = rows + cols - 3;
+	else
+		mctl_ctl->addrmap[0] = 0x1F;
+
+	/* Banks, hardcoded to 8 banks now */
+	mctl_ctl->addrmap[1] = (cols - 2) | (cols - 2) << 8 | (cols - 2) << 16;
+
+	/* Columns */
+	mctl_ctl->addrmap[2] = 0;
+	switch (cols) {
+	case 7:
+		mctl_ctl->addrmap[3] = 0x1F1F1F00;
+		mctl_ctl->addrmap[4] = 0x1F1F;
+		break;
+	case 8:
+		mctl_ctl->addrmap[3] = 0x1F1F0000;
+		mctl_ctl->addrmap[4] = 0x1F1F;
+		break;
+	case 9:
+		mctl_ctl->addrmap[3] = 0x1F000000;
+		mctl_ctl->addrmap[4] = 0x1F1F;
+		break;
+	case 10:
+		mctl_ctl->addrmap[3] = 0;
+		mctl_ctl->addrmap[4] = 0x1F1F;
+		break;
+	case 11:
+		mctl_ctl->addrmap[3] = 0;
+		mctl_ctl->addrmap[4] = 0x1F00;
+		break;
+	case 12:
+		mctl_ctl->addrmap[3] = 0;
+		mctl_ctl->addrmap[4] = 0;
+		break;
+	default:
+		panic("Unsupported DRAM configuration: column number invalid\n");
+	}
+
+	/* Rows */
+	mctl_ctl->addrmap[5] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
+	switch (rows) {
+	case 13:
+		mctl_ctl->addrmap[6] = (cols - 3) | 0x0F0F0F00;
+		mctl_ctl->addrmap[7] = 0x0F0F;
+		break;
+	case 14:
+		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | 0x0F0F0000;
+		mctl_ctl->addrmap[7] = 0x0F0F;
+		break;
+	case 15:
+		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | 0x0F000000;
+		mctl_ctl->addrmap[7] = 0x0F0F;
+		break;
+	case 16:
+		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
+		mctl_ctl->addrmap[7] = 0x0F0F;
+		break;
+	case 17:
+		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
+		mctl_ctl->addrmap[7] = (cols - 3) | 0x0F00;
+		break;
+	case 18:
+		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
+		mctl_ctl->addrmap[7] = (cols - 3) | ((cols - 3) << 8);
+		break;
+	default:
+		panic("Unsupported DRAM configuration: row number invalid\n");
+	}
+
+	/* Bank groups, DDR4 only */
+	mctl_ctl->addrmap[8] = 0x3F3F;
+}
+
+static const u8 phy_init[] = {
+	0x07, 0x0b, 0x02, 0x16, 0x0d, 0x0e, 0x14, 0x19,
+	0x0a, 0x15, 0x03, 0x13, 0x04, 0x0c, 0x10, 0x06,
+	0x0f, 0x11, 0x1a, 0x01, 0x12, 0x17, 0x00, 0x08,
+	0x09, 0x05, 0x18
+};
+
+static void mctl_phy_configure_odt(void)
+{
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x388);
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x38c);
+
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x3c8);
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x3cc);
+
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x408);
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x40c);
+
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x448);
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x44c);
+
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x340);
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x344);
+
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x348);
+	writel_relaxed(0xe, SUNXI_DRAM_PHY0_BASE + 0x34c);
+
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x380);
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x384);
+
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x3c0);
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x3c4);
+
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x400);
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x404);
+
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x440);
+	writel_relaxed(0x8, SUNXI_DRAM_PHY0_BASE + 0x444);
+
+	dmb();
+}
+
+static bool mctl_phy_write_leveling(struct dram_para *para)
+{
+	bool result = true;
+	u32 val;
+
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0xc0, 0x80);
+	writel(4, SUNXI_DRAM_PHY0_BASE + 0xc);
+	writel(0x40, SUNXI_DRAM_PHY0_BASE + 0x10);
+
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 4);
+
+	if (para->bus_full_width)
+		val = 0xf;
+	else
+		val = 3;
+
+	mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0x188), val, val);
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 4);
+
+	val = readl(SUNXI_DRAM_PHY0_BASE + 0x258);
+	if (val == 0 || val == 0x3f)
+		result = false;
+	val = readl(SUNXI_DRAM_PHY0_BASE + 0x25c);
+	if (val == 0 || val == 0x3f)
+		result = false;
+	val = readl(SUNXI_DRAM_PHY0_BASE + 0x318);
+	if (val == 0 || val == 0x3f)
+		result = false;
+	val = readl(SUNXI_DRAM_PHY0_BASE + 0x31c);
+	if (val == 0 || val == 0x3f)
+		result = false;
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0xc0);
+
+	if (para->ranks == 2) {
+		clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0xc0, 0x40);
+
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 4);
+
+		if (para->bus_full_width)
+			val = 0xf;
+		else
+			val = 3;
+
+		mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0x188), val, val);
+
+		clrbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 4);
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0xc0);
+
+	return result;
+}
+
+static bool mctl_phy_read_calibration(struct dram_para *para)
+{
+	bool result = true;
+	u32 val, tmp;
+
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0x30, 0x20);
+
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 1);
+
+	if (para->bus_full_width)
+		val = 0xf;
+	else
+		val = 3;
+
+	while ((readl(SUNXI_DRAM_PHY0_BASE + 0x184) & val) != val) {
+		if (readl(SUNXI_DRAM_PHY0_BASE + 0x184) & 0x20) {
+			result = false;
+			break;
+		}
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 1);
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0x30);
+
+	if (para->ranks == 2) {
+		clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0x30, 0x10);
+
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 1);
+
+		while ((readl(SUNXI_DRAM_PHY0_BASE + 0x184) & val) != val) {
+			if (readl(SUNXI_DRAM_PHY0_BASE + 0x184) & 0x20) {
+				result = false;
+				break;
+			}
+		}
+
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 1);
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 0x30);
+
+	val = readl(SUNXI_DRAM_PHY0_BASE + 0x274) & 7;
+	tmp = readl(SUNXI_DRAM_PHY0_BASE + 0x26c) & 7;
+	if (val < tmp)
+		val = tmp;
+	tmp = readl(SUNXI_DRAM_PHY0_BASE + 0x32c) & 7;
+	if (val < tmp)
+		val = tmp;
+	tmp = readl(SUNXI_DRAM_PHY0_BASE + 0x334) & 7;
+	if (val < tmp)
+		val = tmp;
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x38, 0x7, (val + 2) & 7);
+
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 4, 0x20);
+
+	return result;
+}
+
+static bool mctl_phy_read_training(struct dram_para *para)
+{
+	u32 val1, val2, *ptr1, *ptr2;
+	bool result = true;
+	int i;
+
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x198, 3, 2);
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x804, 0x3f, 0xf);
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x808, 0x3f, 0xf);
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0xa04, 0x3f, 0xf);
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0xa08, 0x3f, 0xf);
+
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 6);
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 1);
+
+	mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0x840), 0xc, 0xc);
+	if (readl(SUNXI_DRAM_PHY0_BASE + 0x840) & 3)
+		result = false;
+
+	if (para->bus_full_width) {
+		mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0xa40), 0xc, 0xc);
+		if (readl(SUNXI_DRAM_PHY0_BASE + 0xa40) & 3)
+			result = false;
+	}
+
+	ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x898);
+	ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x850);
+	for (i = 0; i < 9; i++) {
+		val1 = readl(&ptr1[i]);
+		val2 = readl(&ptr2[i]);
+		if (val1 - val2 <= 6)
+			result = false;
+	}
+	ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x8bc);
+	ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x874);
+	for (i = 0; i < 9; i++) {
+		val1 = readl(&ptr1[i]);
+		val2 = readl(&ptr2[i]);
+		if (val1 - val2 <= 6)
+			result = false;
+	}
+
+	if (para->bus_full_width) {
+		ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xa98);
+		ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xa50);
+		for (i = 0; i < 9; i++) {
+			val1 = readl(&ptr1[i]);
+			val2 = readl(&ptr2[i]);
+			if (val1 - val2 <= 6)
+				result = false;
+		}
+
+		ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xabc);
+		ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xa74);
+		for (i = 0; i < 9; i++) {
+			val1 = readl(&ptr1[i]);
+			val2 = readl(&ptr2[i]);
+			if (val1 - val2 <= 6)
+				result = false;
+		}
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 3);
+
+	if (para->ranks == 2) {
+		/* maybe last parameter should be 1? */
+		clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x198, 3, 2);
+
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 6);
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 1);
+
+		mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0x840), 0xc, 0xc);
+		if (readl(SUNXI_DRAM_PHY0_BASE + 0x840) & 3)
+			result = false;
+
+		if (para->bus_full_width) {
+			mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0xa40), 0xc, 0xc);
+			if (readl(SUNXI_DRAM_PHY0_BASE + 0xa40) & 3)
+				result = false;
+		}
+
+		clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 3);
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x198, 3);
+
+	return result;
+}
+
+static bool mctl_phy_write_training(struct dram_para *para)
+{
+	u32 val1, val2, *ptr1, *ptr2;
+	bool result = true;
+	int i;
+
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x134);
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x138);
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x19c);
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x1a0);
+
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x198, 0xc, 8);
+
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 0x10);
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 0x20);
+
+	mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0x8e0), 3, 3);
+	if (readl(SUNXI_DRAM_PHY0_BASE + 0x8e0) & 0xc)
+		result = false;
+
+	if (para->bus_full_width) {
+		mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0xae0), 3, 3);
+		if (readl(SUNXI_DRAM_PHY0_BASE + 0xae0) & 0xc)
+			result = false;
+	}
+
+	ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x938);
+	ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x8f0);
+	for (i = 0; i < 9; i++) {
+		val1 = readl(&ptr1[i]);
+		val2 = readl(&ptr2[i]);
+		if (val1 - val2 <= 6)
+			result = false;
+	}
+	ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x95c);
+	ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x914);
+	for (i = 0; i < 9; i++) {
+		val1 = readl(&ptr1[i]);
+		val2 = readl(&ptr2[i]);
+		if (val1 - val2 <= 6)
+			result = false;
+	}
+
+	if (para->bus_full_width) {
+		ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xb38);
+		ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xaf0);
+		for (i = 0; i < 9; i++) {
+			val1 = readl(&ptr1[i]);
+			val2 = readl(&ptr2[i]);
+			if (val1 - val2 <= 6)
+				result = false;
+		}
+		ptr1 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xb5c);
+		ptr2 = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xb14);
+		for (i = 0; i < 9; i++) {
+			val1 = readl(&ptr1[i]);
+			val2 = readl(&ptr2[i]);
+			if (val1 - val2 <= 6)
+				result = false;
+		}
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 0x60);
+
+	if (para->ranks == 2) {
+		clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x198, 0xc, 4);
+
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 0x10);
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 0x20);
+
+		mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0x8e0), 3, 3);
+		if (readl(SUNXI_DRAM_PHY0_BASE + 0x8e0) & 0xc)
+			result = false;
+
+		if (para->bus_full_width) {
+			mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0xae0), 3, 3);
+			if (readl(SUNXI_DRAM_PHY0_BASE + 0xae0) & 0xc)
+				result = false;
+		}
+
+		clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 0x60);
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x198, 0xc);
+
+	return result;
+}
+
+static bool mctl_phy_bit_delay_compensation(struct dram_para *para)
+{
+	u32 *ptr;
+	int i;
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x60, 1);
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 8, 8);
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 0x10);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x484);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x16, ptr);
+		writel_relaxed(0x16, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x4d0);
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x590);
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x4cc);
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x58c);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x4d8);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x1a, ptr);
+		writel_relaxed(0x1a, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x524);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x5e4);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x520);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x5e0);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x604);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x1a, ptr);
+		writel_relaxed(0x1a, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x650);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x710);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x64c);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x70c);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x658);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x1a, ptr);
+		writel_relaxed(0x1a, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x6a4);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x764);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x6a0);
+	writel_relaxed(0x1e, SUNXI_DRAM_PHY0_BASE + 0x760);
+
+	dmb();
+
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x60, 1);
+
+	/* second part */
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x54, 0x80);
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x190, 4);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x480);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x10, ptr);
+		writel_relaxed(0x10, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x18, SUNXI_DRAM_PHY0_BASE + 0x528);
+	writel_relaxed(0x18, SUNXI_DRAM_PHY0_BASE + 0x5e8);
+	writel_relaxed(0x18, SUNXI_DRAM_PHY0_BASE + 0x4c8);
+	writel_relaxed(0x18, SUNXI_DRAM_PHY0_BASE + 0x588);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x4d4);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x12, ptr);
+		writel_relaxed(0x12, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x52c);
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x5ec);
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x51c);
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x5dc);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x600);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x12, ptr);
+		writel_relaxed(0x12, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x6a8);
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x768);
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x648);
+	writel_relaxed(0x1a, SUNXI_DRAM_PHY0_BASE + 0x708);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x654);
+	for (i = 0; i < 9; i++) {
+		writel_relaxed(0x14, ptr);
+		writel_relaxed(0x14, ptr + 0x30);
+		ptr += 2;
+	}
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x6ac);
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x76c);
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x69c);
+	writel_relaxed(0x1c, SUNXI_DRAM_PHY0_BASE + 0x75c);
+
+	dmb();
+
+	setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x54, 0x80);
+
+	return true;
+}
+
+static bool mctl_phy_init(struct dram_para *para)
+{
+	struct sunxi_mctl_com_reg * const mctl_com =
+			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+	struct sunxi_mctl_ctl_reg * const mctl_ctl =
+			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
+	u32 val, *ptr;
+	int i;
+
+	if (para->bus_full_width)
+		val = 0xf;
+	else
+		val = 3;
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x3c, 0xf, val);
+
+	writel(0xd, SUNXI_DRAM_PHY0_BASE + 0x14);
+	writel(0xd, SUNXI_DRAM_PHY0_BASE + 0x35c);
+	writel(0xd, SUNXI_DRAM_PHY0_BASE + 0x368);
+	writel(0xd, SUNXI_DRAM_PHY0_BASE + 0x374);
+
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x18);
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x360);
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x36c);
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x378);
+
+	writel(9, SUNXI_DRAM_PHY0_BASE + 0x1c);
+	writel(9, SUNXI_DRAM_PHY0_BASE + 0x364);
+	writel(9, SUNXI_DRAM_PHY0_BASE + 0x370);
+	writel(9, SUNXI_DRAM_PHY0_BASE + 0x37c);
+
+	ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0xc0);
+	for (i = 0; i < ARRAY_SIZE(phy_init); i++)
+		writel(phy_init[i], &ptr[i]);
+
+	if (IS_ENABLED(CONFIG_DRAM_SUN50I_H616_UNKNOWN_FEATURE)) {
+		ptr = (u32*)(SUNXI_DRAM_PHY0_BASE + 0x780);
+		for (i = 0; i < 32; i++)
+			writel(0x16, &ptr[i]);
+		writel(0xe, SUNXI_DRAM_PHY0_BASE + 0x78c);
+		writel(0xe, SUNXI_DRAM_PHY0_BASE + 0x7a4);
+		writel(0xe, SUNXI_DRAM_PHY0_BASE + 0x7b8);
+		writel(0x8, SUNXI_DRAM_PHY0_BASE + 0x7d4);
+		writel(0xe, SUNXI_DRAM_PHY0_BASE + 0x7dc);
+		writel(0xe, SUNXI_DRAM_PHY0_BASE + 0x7e0);
+	}
+
+	writel(0x80, SUNXI_DRAM_PHY0_BASE + 0x3dc);
+	writel(0x80, SUNXI_DRAM_PHY0_BASE + 0x45c);
+
+	if (IS_ENABLED(DRAM_ODT_EN))
+		mctl_phy_configure_odt();
+
+	clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 4, 7, 0xa);
+
+	if (para->clk <= 672)
+		writel(0xf, SUNXI_DRAM_PHY0_BASE + 0x20);
+	if (para->clk > 500) {
+		clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x144, BIT(7));
+		clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x14c, 0xe0);
+	} else {
+		setbits_le32(SUNXI_DRAM_PHY0_BASE + 0x144, BIT(7));
+		clrsetbits_le32(SUNXI_DRAM_PHY0_BASE + 0x14c, 0xe0, 0x20);
+	}
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x14c, 8);
+
+	mctl_await_completion((u32*)(SUNXI_DRAM_PHY0_BASE + 0x180), 4, 4);
+
+	writel(0x37, SUNXI_DRAM_PHY0_BASE + 0x58);
+	clrbits_le32(&mctl_com->unk_0x500, 0x200);
+
+	writel(0, &mctl_ctl->swctl);
+	setbits_le32(&mctl_ctl->dfimisc, 1);
+
+	/* start DFI init */
+	setbits_le32(&mctl_ctl->dfimisc, 0x20);
+	writel(1, &mctl_ctl->swctl);
+	mctl_await_completion(&mctl_ctl->swstat, 1, 1);
+	/* poll DFI init complete */
+	mctl_await_completion(&mctl_ctl->dfistat, 1, 1);
+	writel(0, &mctl_ctl->swctl);
+	clrbits_le32(&mctl_ctl->dfimisc, 0x20);
+
+	clrbits_le32(&mctl_ctl->pwrctl, 0x20);
+	writel(1, &mctl_ctl->swctl);
+	mctl_await_completion(&mctl_ctl->swstat, 1, 1);
+	mctl_await_completion(&mctl_ctl->statr, 3, 1);
+
+	writel(0, &mctl_ctl->swctl);
+	clrbits_le32(&mctl_ctl->dfimisc, 1);
+
+	writel(1, &mctl_ctl->swctl);
+	mctl_await_completion(&mctl_ctl->swstat, 1, 1);
+
+	writel(0x1f14, &mctl_ctl->mrctrl1);
+	writel(0x80000030, &mctl_ctl->mrctrl0);
+	mctl_await_completion(&mctl_ctl->mrctrl0, BIT(31), 0);
+
+	writel(4, &mctl_ctl->mrctrl1);
+	writel(0x80001030, &mctl_ctl->mrctrl0);
+	mctl_await_completion(&mctl_ctl->mrctrl0, BIT(31), 0);
+
+	writel(0x20, &mctl_ctl->mrctrl1);
+	writel(0x80002030, &mctl_ctl->mrctrl0);
+	mctl_await_completion(&mctl_ctl->mrctrl0, BIT(31), 0);
+
+	writel(0, &mctl_ctl->mrctrl1);
+	writel(0x80003030, &mctl_ctl->mrctrl0);
+	mctl_await_completion(&mctl_ctl->mrctrl0, BIT(31), 0);
+
+	writel(0, SUNXI_DRAM_PHY0_BASE + 0x54);
+
+	writel(0, &mctl_ctl->swctl);
+	clrbits_le32(&mctl_ctl->rfshctl3, 1);
+	writel(1, &mctl_ctl->swctl);
+
+	if (IS_ENABLED(CONFIG_DRAM_SUN50I_H616_WRITE_LEVELING)) {
+		for (i = 0; i < 5; i++)
+			if (mctl_phy_write_leveling(para))
+				break;
+		if (i == 5) {
+			debug("write leveling failed!\n");
+			return false;
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_DRAM_SUN50I_H616_READ_CALIBRATION)) {
+		for (i = 0; i < 5; i++)
+			if (mctl_phy_read_calibration(para))
+				break;
+		if (i == 5) {
+			debug("read calibration failed!\n");
+			return false;
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_DRAM_SUN50I_H616_READ_TRAINING)) {
+		for (i = 0; i < 5; i++)
+			if (mctl_phy_read_training(para))
+				break;
+		if (i == 5) {
+			debug("read training failed!\n");
+			return false;
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_DRAM_SUN50I_H616_WRITE_TRAINING)) {
+		for (i = 0; i < 5; i++)
+			if (mctl_phy_write_training(para))
+				break;
+		if (i == 5) {
+			debug("write training failed!\n");
+			return false;
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_DRAM_SUN50I_H616_BIT_DELAY_COMPENSATION))
+		mctl_phy_bit_delay_compensation(para);
+
+	clrbits_le32(SUNXI_DRAM_PHY0_BASE + 0x60, 4);
+
+	return true;
+}
+
+static bool mctl_ctrl_init(struct dram_para *para)
+{
+	struct sunxi_mctl_com_reg * const mctl_com =
+			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+	struct sunxi_mctl_ctl_reg * const mctl_ctl =
+			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
+	u32 reg_val;
+
+	clrsetbits_le32(&mctl_com->unk_0x500, BIT(24), 0x200);
+	writel(0x8000, &mctl_ctl->clken);
+
+	setbits_le32(&mctl_com->unk_0x008, 0xff00);
+
+	clrsetbits_le32(&mctl_ctl->sched[0], 0xff00, 0x3000);
+
+	writel(0, &mctl_ctl->hwlpctl);
+
+	setbits_le32(&mctl_com->unk_0x008, 0xff00);
+
+	reg_val = MSTR_BURST_LENGTH(8) | MSTR_ACTIVE_RANKS(para->ranks);
+	reg_val |= MSTR_DEVICETYPE_DDR3 | MSTR_2TMODE;
+	if (para->bus_full_width)
+		reg_val |= MSTR_BUSWIDTH_FULL;
+	else
+		reg_val |= MSTR_BUSWIDTH_HALF;
+	writel(BIT(31) | BIT(30) | reg_val, &mctl_ctl->mstr);
+
+	if (para->ranks == 2)
+		writel(0x0303, &mctl_ctl->odtmap);
+	else
+		writel(0x0201, &mctl_ctl->odtmap);
+
+	writel(0x06000400, &mctl_ctl->odtcfg);
+	writel(0x06000400, &mctl_ctl->unk_0x2240);
+	writel(0x06000400, &mctl_ctl->unk_0x3240);
+	writel(0x06000400, &mctl_ctl->unk_0x4240);
+
+	setbits_le32(&mctl_com->cr, BIT(31));
+
+	mctl_set_addrmap(para);
+
+	mctl_set_timing_params(para);
+
+	writel(0, &mctl_ctl->pwrctl);
+
+	setbits_le32(&mctl_ctl->dfiupd[0], BIT(31) | BIT(30));
+	setbits_le32(&mctl_ctl->zqctl[0], BIT(31) | BIT(30));
+	setbits_le32(&mctl_ctl->unk_0x2180, BIT(31) | BIT(30));
+	setbits_le32(&mctl_ctl->unk_0x3180, BIT(31) | BIT(30));
+	setbits_le32(&mctl_ctl->unk_0x4180, BIT(31) | BIT(30));
+
+	setbits_le32(&mctl_ctl->rfshctl3, BIT(0));
+	clrbits_le32(&mctl_ctl->dfimisc, BIT(0));
+
+	writel(0, &mctl_com->maer0);
+	writel(0, &mctl_com->maer1);
+	writel(0, &mctl_com->maer2);
+
+	writel(0x20, &mctl_ctl->pwrctl);
+	setbits_le32(&mctl_ctl->clken, BIT(8));
+
+	clrsetbits_le32(&mctl_com->unk_0x500, BIT(24), 0x300);
+	/* this write seems to enable PHY MMIO region */
+	setbits_le32(&mctl_com->unk_0x500, BIT(24));
+
+	if (!mctl_phy_init(para))
+		return false;
+
+	writel(0, &mctl_ctl->swctl);
+	clrbits_le32(&mctl_ctl->rfshctl3, BIT(0));
+
+	setbits_le32(&mctl_com->unk_0x014, BIT(31));
+	writel(0xffffffff, &mctl_com->maer0);
+	writel(0x7ff, &mctl_com->maer1);
+	writel(0xffff, &mctl_com->maer2);
+
+	writel(1, &mctl_ctl->swctl);
+	mctl_await_completion(&mctl_ctl->swstat, 1, 1);
+
+	return true;
+}
+
+static bool mctl_core_init(struct dram_para *para)
+{
+	mctl_sys_init(para);
+
+	return mctl_ctrl_init(para);
+}
+
+static void mctl_auto_detect_rank_width(struct dram_para *para)
+{
+	/* this is minimum size that it's supported */
+	para->cols = 8;
+	para->rows = 13;
+
+	/*
+	 * Strategy here is to test most demanding combination first and least
+	 * demanding last, otherwise HW might not be fully utilized. For
+	 * example, half bus width and rank = 1 combination would also work
+	 * on HW with full bus width and rank = 2, but only 1/4 RAM would be
+	 * visible.
+	 */
+
+	debug("testing 32-bit width, rank = 2\n");
+	para->bus_full_width = 1;
+	para->ranks = 2;
+	if (mctl_core_init(para))
+		return;
+
+	debug("testing 32-bit width, rank = 1\n");
+	para->bus_full_width = 1;
+	para->ranks = 1;
+	if (mctl_core_init(para))
+		return;
+
+	debug("testing 16-bit width, rank = 2\n");
+	para->bus_full_width = 0;
+	para->ranks = 2;
+	if (mctl_core_init(para))
+		return;
+
+	debug("testing 16-bit width, rank = 1\n");
+	para->bus_full_width = 0;
+	para->ranks = 1;
+	if (mctl_core_init(para))
+		return;
+
+	panic("This DRAM setup is currently not supported.\n");
+}
+
+static void mctl_auto_detect_dram_size(struct dram_para *para)
+{
+	/* detect row address bits */
+	para->cols = 8;
+	para->rows = 18;
+	mctl_core_init(para);
+
+	for (para->rows = 13; para->rows < 18; para->rows++) {
+		/* 8 banks, 8 bit per byte and 16/32 bit width */
+		if (mctl_mem_matches((1 << (para->rows + para->cols +
+					    4 + para->bus_full_width))))
+			break;
+	}
+
+	/* detect column address bits */
+	para->cols = 11;
+	mctl_core_init(para);
+
+	for (para->cols = 8; para->cols < 11; para->cols++) {
+		/* 8 bits per byte and 16/32 bit width */
+		if (mctl_mem_matches(1 << (para->cols + 1 +
+					   para->bus_full_width)))
+			break;
+	}
+}
+
+static unsigned long mctl_calc_size(struct dram_para *para)
+{
+	u8 width = para->bus_full_width ? 4 : 2;
+
+	/* 8 banks */
+	return (1ULL << (para->cols + para->rows + 3)) * width * para->ranks;
+}
+
+unsigned long sunxi_dram_init(void)
+{
+	struct dram_para para = {
+		.clk = CONFIG_DRAM_CLK,
+		.type = SUNXI_DRAM_TYPE_DDR3,
+	};
+	unsigned long size;
+
+	setbits_le32(0x7010310, BIT(8));
+	clrbits_le32(0x7010318, 0x3f);
+
+	mctl_auto_detect_rank_width(&para);
+	mctl_auto_detect_dram_size(&para);
+
+	mctl_core_init(&para);
+
+	size = mctl_calc_size(&para);
+
+	mctl_set_master_priority();
+
+	return size;
+};
diff --git a/arch/arm/mach-sunxi/dram_timings/Makefile b/arch/arm/mach-sunxi/dram_timings/Makefile
index 0deb991..39a8756 100644
--- a/arch/arm/mach-sunxi/dram_timings/Makefile
+++ b/arch/arm/mach-sunxi/dram_timings/Makefile
@@ -3,3 +3,5 @@
 obj-$(CONFIG_SUNXI_DRAM_DDR2_V3S)	+= ddr2_v3s.o
 obj-$(CONFIG_SUNXI_DRAM_H6_LPDDR3)	+= h6_lpddr3.o
 obj-$(CONFIG_SUNXI_DRAM_H6_DDR3_1333)	+= h6_ddr3_1333.o
+# currently only DDR3 is supported on H616
+obj-$(CONFIG_MACH_SUN50I_H616)		+= h616_ddr3_1333.o
diff --git a/arch/arm/mach-sunxi/dram_timings/h616_ddr3_1333.c b/arch/arm/mach-sunxi/dram_timings/h616_ddr3_1333.c
new file mode 100644
index 0000000..8f50834
--- /dev/null
+++ b/arch/arm/mach-sunxi/dram_timings/h616_ddr3_1333.c
@@ -0,0 +1,94 @@
+/*
+ * sun50i H616 DDR3-1333 timings, as programmed by Allwinner's boot0
+ *
+ * The chips are probably able to be driven by a faster clock, but boot0
+ * uses a more conservative timing (as usual).
+ *
+ * (C) Copyright 2020 Jernej Skrabec <jernej.skrabec@siol.net>
+ * Based on H6 DDR3 timings:
+ * (C) Copyright 2018,2019 Arm Ltd.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/arch/dram.h>
+#include <asm/arch/cpu.h>
+
+void mctl_set_timing_params(struct dram_para *para)
+{
+	struct sunxi_mctl_ctl_reg * const mctl_ctl =
+			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
+
+	u8 tccd		= 2;			/* JEDEC: 4nCK */
+	u8 tfaw		= ns_to_t(50);		/* JEDEC: 30 ns w/ 1K pages */
+	u8 trrd		= max(ns_to_t(6), 4);	/* JEDEC: max(6 ns, 4nCK) */
+	u8 trcd		= ns_to_t(15);		/* JEDEC: 13.5 ns */
+	u8 trc		= ns_to_t(53);		/* JEDEC: 49.5 ns */
+	u8 txp		= max(ns_to_t(6), 3);	/* JEDEC: max(6 ns, 3nCK) */
+	u8 trtp		= max(ns_to_t(8), 2);	/* JEDEC: max(7.5 ns, 4nCK) */
+	u8 trp		= ns_to_t(15);		/* JEDEC: >= 13.75 ns */
+	u8 tras		= ns_to_t(38);		/* JEDEC >= 36 ns, <= 9*trefi */
+	u16 trefi	= ns_to_t(7800) / 32;	/* JEDEC: 7.8us@Tcase <= 85C */
+	u16 trfc	= ns_to_t(350);		/* JEDEC: 160 ns for 2Gb */
+	u16 txsr	= 4;			/* ? */
+
+	u8 tmrw		= 0;			/* ? */
+	u8 tmrd		= 4;			/* JEDEC: 4nCK */
+	u8 tmod		= max(ns_to_t(15), 12);	/* JEDEC: max(15 ns, 12nCK) */
+	u8 tcke		= max(ns_to_t(6), 3);	/* JEDEC: max(5.625 ns, 3nCK) */
+	u8 tcksrx	= max(ns_to_t(10), 4);	/* JEDEC: max(10 ns, 5nCK) */
+	u8 tcksre	= max(ns_to_t(10), 4);	/* JEDEC: max(10 ns, 5nCK) */
+	u8 tckesr	= tcke + 1;		/* JEDEC: tCKE(min) + 1nCK */
+	u8 trasmax	= (para->clk / 2) / 15;	/* JEDEC: tREFI * 9 */
+	u8 txs		= ns_to_t(360) / 32;	/* JEDEC: max(5nCK,tRFC+10ns) */
+	u8 txsdll	= 16;			/* JEDEC: 512 nCK */
+	u8 txsabort	= 4;			/* ? */
+	u8 txsfast	= 4;			/* ? */
+	u8 tcl		= 7;			/* JEDEC: CL / 2 => 6 */
+	u8 tcwl		= 5;			/* JEDEC: 8 */
+	u8 t_rdata_en	= 9;			/* ? */
+
+	u8 twtp		= 14;			/* (WL + BL / 2 + tWR) / 2 */
+	u8 twr2rd	= trtp + 7;		/* (WL + BL / 2 + tWTR) / 2 */
+	u8 trd2wr	= 5;			/* (RL + BL / 2 + 2 - WL) / 2 */
+
+	/* set DRAM timing */
+	writel((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras,
+	       &mctl_ctl->dramtmg[0]);
+	writel((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]);
+	writel((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd,
+	       &mctl_ctl->dramtmg[2]);
+	writel((tmrw << 20) | (tmrd << 12) | tmod, &mctl_ctl->dramtmg[3]);
+	writel((trcd << 24) | (tccd << 16) | (trrd << 8) | trp,
+	       &mctl_ctl->dramtmg[4]);
+	writel((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke,
+	       &mctl_ctl->dramtmg[5]);
+	/* Value suggested by ZynqMP manual and used by libdram */
+	writel((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]);
+	writel((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs,
+	       &mctl_ctl->dramtmg[8]);
+	writel(0x00020208, &mctl_ctl->dramtmg[9]);
+	writel(0xE0C05, &mctl_ctl->dramtmg[10]);
+	writel(0x440C021C, &mctl_ctl->dramtmg[11]);
+	writel(8, &mctl_ctl->dramtmg[12]);
+	writel(0xA100002, &mctl_ctl->dramtmg[13]);
+	writel(txsr, &mctl_ctl->dramtmg[14]);
+
+	clrbits_le32(&mctl_ctl->init[0], 3 << 30);
+	writel(0x420000, &mctl_ctl->init[1]);
+	writel(5, &mctl_ctl->init[2]);
+	writel(0x1f140004, &mctl_ctl->init[3]);
+	writel(0x00200000, &mctl_ctl->init[4]);
+
+	writel(0, &mctl_ctl->dfimisc);
+	clrsetbits_le32(&mctl_ctl->rankctl, 0xff0, 0x660);
+
+	/* Configure DFI timing */
+	writel((tcl - 2) | 0x2000000 | (t_rdata_en << 16) | 0x808000,
+	       &mctl_ctl->dfitmg0);
+	writel(0x100202, &mctl_ctl->dfitmg1);
+
+	/* set refresh timing */
+	writel((trefi << 16) | trfc, &mctl_ctl->rfshtmg);
+}