driver/ddr/fsl: Add DDR4 support to Freescale DDR driver

Mostly reusing DDR3 driver, this patch adds DDR4 SPD handling, register
calculation and programming.

Signed-off-by: York Sun <yorksun@freescale.com>
diff --git a/drivers/ddr/fsl/Makefile b/drivers/ddr/fsl/Makefile
index 265204f..df66c07 100644
--- a/drivers/ddr/fsl/Makefile
+++ b/drivers/ddr/fsl/Makefile
@@ -1,19 +1,20 @@
 #
-# Copyright 2008-2011 Freescale Semiconductor, Inc.
+# Copyright 2008-2014 Freescale Semiconductor, Inc.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # Version 2 as published by the Free Software Foundation.
 #
 
-obj-$(CONFIG_SYS_FSL_DDR1)	+= main.o util.o ctrl_regs.o options.o \
-				   lc_common_dimm_params.o
+obj-$(CONFIG_SYS_FSL_DDR1) += main.o util.o ctrl_regs.o options.o \
+				lc_common_dimm_params.o
+obj-$(CONFIG_SYS_FSL_DDR2) += main.o util.o ctrl_regs.o options.o \
+				lc_common_dimm_params.o
+obj-$(CONFIG_SYS_FSL_DDR3) += main.o util.o ctrl_regs.o options.o \
+				lc_common_dimm_params.o
+obj-$(CONFIG_SYS_FSL_DDR4) += main.o util.o ctrl_regs.o options.o \
+				lc_common_dimm_params.o
 
-obj-$(CONFIG_SYS_FSL_DDR2)	+= main.o util.o ctrl_regs.o options.o \
-				   lc_common_dimm_params.o
-
-obj-$(CONFIG_SYS_FSL_DDR3)	+= main.o util.o ctrl_regs.o options.o \
-				   lc_common_dimm_params.o
 ifdef CONFIG_DDR_SPD
 SPD := y
 endif
@@ -24,6 +25,7 @@
 obj-$(CONFIG_SYS_FSL_DDR1)	+= ddr1_dimm_params.o
 obj-$(CONFIG_SYS_FSL_DDR2)	+= ddr2_dimm_params.o
 obj-$(CONFIG_SYS_FSL_DDR3)	+= ddr3_dimm_params.o
+obj-$(CONFIG_SYS_FSL_DDR4)	+= ddr4_dimm_params.o
 endif
 
 obj-$(CONFIG_FSL_DDR_INTERACTIVE)	+= interactive.o
@@ -32,3 +34,4 @@
 obj-$(CONFIG_SYS_FSL_DDRC_GEN3)	+= mpc85xx_ddr_gen3.o
 obj-$(CONFIG_SYS_FSL_DDR_86XX)		+= mpc86xx_ddr.o
 obj-$(CONFIG_SYS_FSL_DDRC_ARM_GEN3)	+= arm_ddr_gen3.o
+obj-$(CONFIG_SYS_FSL_DDRC_GEN4) += fsl_ddr_gen4.o
diff --git a/drivers/ddr/fsl/ctrl_regs.c b/drivers/ddr/fsl/ctrl_regs.c
index 0882932..d01eea0 100644
--- a/drivers/ddr/fsl/ctrl_regs.c
+++ b/drivers/ddr/fsl/ctrl_regs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008-2014 Freescale Semiconductor, Inc.
  *
  * SPDX-License-Identifier:	GPL-2.0+
  */
@@ -17,20 +17,6 @@
 #include <fsl_immap.h>
 #include <asm/io.h>
 
-#define _DDR_ADDR CONFIG_SYS_FSL_DDR_ADDR
-
-static u32 fsl_ddr_get_version(void)
-{
-	struct ccsr_ddr __iomem *ddr;
-	u32 ver_major_minor_errata;
-
-	ddr = (void *)_DDR_ADDR;
-	ver_major_minor_errata = (ddr_in32(&ddr->ip_rev1) & 0xFFFF) << 8;
-	ver_major_minor_errata |= (ddr_in32(&ddr->ip_rev2) & 0xFF00) >> 8;
-
-	return ver_major_minor_errata;
-}
-
 unsigned int picos_to_mclk(unsigned int picos);
 
 /*
@@ -81,6 +67,39 @@
 	return rtt;
 }
 
+#ifdef CONFIG_SYS_FSL_DDR4
+/*
+ * compute CAS write latency according to DDR4 spec
+ * CWL = 9 for <= 1600MT/s
+ *       10 for <= 1866MT/s
+ *       11 for <= 2133MT/s
+ *       12 for <= 2400MT/s
+ *       14 for <= 2667MT/s
+ *       16 for <= 2933MT/s
+ *       18 for higher
+ */
+static inline unsigned int compute_cas_write_latency(void)
+{
+	unsigned int cwl;
+	const unsigned int mclk_ps = get_memory_clk_period_ps();
+	if (mclk_ps >= 1250)
+		cwl = 9;
+	else if (mclk_ps >= 1070)
+		cwl = 10;
+	else if (mclk_ps >= 935)
+		cwl = 11;
+	else if (mclk_ps >= 833)
+		cwl = 12;
+	else if (mclk_ps >= 750)
+		cwl = 14;
+	else if (mclk_ps >= 681)
+		cwl = 16;
+	else
+		cwl = 18;
+
+	return cwl;
+}
+#else
 /*
  * compute the CAS write latency according to DDR3 spec
  * CWL = 5 if tCK >= 2.5ns
@@ -119,6 +138,7 @@
 	}
 	return cwl;
 }
+#endif
 
 /* Chip Select Configuration (CSn_CONFIG) */
 static void set_csn_config(int dimm_number, int i, fsl_ddr_cfg_regs_t *ddr,
@@ -135,6 +155,11 @@
 	unsigned int row_bits_cs_n = 0; /* Num of row bits for SDRAM on CSn */
 	unsigned int col_bits_cs_n = 0; /* Num of ocl bits for SDRAM on CSn */
 	int go_config = 0;
+#ifdef CONFIG_SYS_FSL_DDR4
+	unsigned int bg_bits_cs_n = 0; /* Num of bank group bits */
+#else
+	unsigned int n_banks_per_sdram_device;
+#endif
 
 	/* Compute CS_CONFIG only for existing ranks of each DIMM.  */
 	switch (i) {
@@ -178,14 +203,18 @@
 		break;
 	}
 	if (go_config) {
-		unsigned int n_banks_per_sdram_device;
 		cs_n_en = 1;
 		ap_n_en = popts->cs_local_opts[i].auto_precharge;
 		odt_rd_cfg = popts->cs_local_opts[i].odt_rd_cfg;
 		odt_wr_cfg = popts->cs_local_opts[i].odt_wr_cfg;
+#ifdef CONFIG_SYS_FSL_DDR4
+		ba_bits_cs_n = dimm_params[dimm_number].bank_addr_bits;
+		bg_bits_cs_n = dimm_params[dimm_number].bank_group_bits;
+#else
 		n_banks_per_sdram_device
 			= dimm_params[dimm_number].n_banks_per_sdram_device;
 		ba_bits_cs_n = __ilog2(n_banks_per_sdram_device) - 2;
+#endif
 		row_bits_cs_n = dimm_params[dimm_number].n_row_addr - 12;
 		col_bits_cs_n = dimm_params[dimm_number].n_col_addr - 8;
 	}
@@ -203,6 +232,9 @@
 
 		| ((ba_bits_cs_n & 0x3) << 14)
 		| ((row_bits_cs_n & 0x7) << 8)
+#ifdef CONFIG_SYS_FSL_DDR4
+		| ((bg_bits_cs_n & 0x3) << 4)
+#endif
 		| ((col_bits_cs_n & 0x7) << 0)
 		);
 	debug("FSLDDR: cs[%d]_config = 0x%08x\n", i,ddr->cs[i].config);
@@ -262,11 +294,23 @@
 	/* Precharge powerdown exit timing (tXP). */
 	unsigned char pre_pd_exit_mclk;
 	/* ODT powerdown exit timing (tAXPD). */
-	unsigned char taxpd_mclk;
+	unsigned char taxpd_mclk = 0;
 	/* Mode register set cycle time (tMRD). */
 	unsigned char tmrd_mclk;
 
-#ifdef CONFIG_SYS_FSL_DDR3
+#ifdef CONFIG_SYS_FSL_DDR4
+	/* tXP=max(4nCK, 6ns) */
+	int txp = max((get_memory_clk_period_ps() * 4), 6000); /* unit=ps */
+	trwt_mclk = 2;
+	twrt_mclk = 1;
+	act_pd_exit_mclk = picos_to_mclk(txp);
+	pre_pd_exit_mclk = act_pd_exit_mclk;
+	/*
+	 * MRS_CYC = max(tMRD, tMOD)
+	 * tMRD = 8nCK, tMOD = max(24nCK, 15ns)
+	 */
+	tmrd_mclk = max(24, picos_to_mclk(15000));
+#elif defined(CONFIG_SYS_FSL_DDR3)
 	/*
 	 * (tXARD and tXARDS). Empirical?
 	 * The DDR3 spec has not tXARD,
@@ -275,7 +319,7 @@
 	 * spec has not the tAXPD, we use
 	 * tAXPD=1, need design to confirm.
 	 */
-	int tXP = max((get_memory_clk_period_ps() * 3), 7500); /* unit=ps */
+	int txp = max((get_memory_clk_period_ps() * 3), 7500); /* unit=ps */
 	unsigned int data_rate = get_ddr_freq(0);
 	tmrd_mclk = 4;
 	/* set the turnaround time */
@@ -300,7 +344,7 @@
 		taxpd_mclk = 1;
 	} else {
 		/* act_pd_exit_mclk = tXARD, see above */
-		act_pd_exit_mclk = picos_to_mclk(tXP);
+		act_pd_exit_mclk = picos_to_mclk(txp);
 		/* Mode register MR0[A12] is '1' - fast exit */
 		pre_pd_exit_mclk = act_pd_exit_mclk;
 		taxpd_mclk = 1;
@@ -364,8 +408,12 @@
 	ext_acttorw = picos_to_mclk(common_dimm->trcd_ps) >> 4;
 	ext_caslat = (2 * cas_latency - 1) >> 4;
 	ext_add_lat = additive_latency >> 4;
+#ifdef CONFIG_SYS_FSL_DDR4
+	ext_refrec = (picos_to_mclk(common_dimm->trfc1_ps) - 8) >> 4;
+#else
 	ext_refrec = (picos_to_mclk(common_dimm->trfc_ps) - 8) >> 4;
 	/* ext_wrrec only deals with 16 clock and above, or 14 with OTF */
+#endif
 	ext_wrrec = (picos_to_mclk(common_dimm->twr_ps) +
 		(popts->otf_burst_chop_en ? 2 : 0)) >> 4;
 
@@ -404,9 +452,19 @@
 	unsigned char acttoact_mclk;
 	/* Last write data pair to read command issue interval (tWTR) */
 	unsigned char wrtord_mclk;
+#ifdef CONFIG_SYS_FSL_DDR4
+	/* DDR4 supports 10, 12, 14, 16, 18, 20, 24 */
+	static const u8 wrrec_table[] = {
+		10, 10, 10, 10, 10,
+		10, 10, 10, 10, 10,
+		12, 12, 14, 14, 16,
+		16, 18, 18, 20, 20,
+		24, 24, 24, 24};
+#else
 	/* DDR_SDRAM_MODE doesn't support 9,11,13,15 */
 	static const u8 wrrec_table[] = {
 		1, 2, 3, 4, 5, 6, 7, 8, 10, 10, 12, 12, 14, 14, 0, 0};
+#endif
 
 	pretoact_mclk = picos_to_mclk(common_dimm->trp_ps);
 	acttopre_mclk = picos_to_mclk(common_dimm->tras_ps);
@@ -438,20 +496,34 @@
 	 * we need set extend bit for it at
 	 * TIMING_CFG_3[EXT_CASLAT]
 	 */
-	caslat_ctrl = 2 * cas_latency - 1;
+	if (fsl_ddr_get_version() <= 0x40400)
+		caslat_ctrl = 2 * cas_latency - 1;
+	else
+		caslat_ctrl = (cas_latency - 1) << 1;
 #endif
 
+#ifdef CONFIG_SYS_FSL_DDR4
+	refrec_ctrl = picos_to_mclk(common_dimm->trfc1_ps) - 8;
+	wrrec_mclk = picos_to_mclk(common_dimm->twr_ps);
+	acttoact_mclk = max(picos_to_mclk(common_dimm->trrds_ps), 4);
+	wrtord_mclk = max(2, picos_to_mclk(2500));
+	if (wrrec_mclk > 24)
+		printf("Error: WRREC doesn't support more than 24 clocks\n");
+	else
+		wrrec_mclk = wrrec_table[wrrec_mclk - 1];
+#else
 	refrec_ctrl = picos_to_mclk(common_dimm->trfc_ps) - 8;
 	wrrec_mclk = picos_to_mclk(common_dimm->twr_ps);
-
+	acttoact_mclk = picos_to_mclk(common_dimm->trrd_ps);
+	wrtord_mclk = picos_to_mclk(common_dimm->twtr_ps);
 	if (wrrec_mclk > 16)
 		printf("Error: WRREC doesn't support more than 16 clocks\n");
 	else
 		wrrec_mclk = wrrec_table[wrrec_mclk - 1];
+#endif
 	if (popts->otf_burst_chop_en)
 		wrrec_mclk += 2;
 
-	acttoact_mclk = picos_to_mclk(common_dimm->trrd_ps);
 	/*
 	 * JEDEC has min requirement for tRRD
 	 */
@@ -459,7 +531,6 @@
 	if (acttoact_mclk < 4)
 		acttoact_mclk = 4;
 #endif
-	wrtord_mclk = picos_to_mclk(common_dimm->twtr_ps);
 	/*
 	 * JEDEC has some min requirements for tWTR
 	 */
@@ -526,14 +597,18 @@
 	wr_lat = compute_cas_write_latency();
 #endif
 
+#ifdef CONFIG_SYS_FSL_DDR4
+	rd_to_pre = picos_to_mclk(7500);
+#else
 	rd_to_pre = picos_to_mclk(common_dimm->trtp_ps);
+#endif
 	/*
 	 * JEDEC has some min requirements for tRTP
 	 */
 #if defined(CONFIG_SYS_FSL_DDR2)
 	if (rd_to_pre  < 2)
 		rd_to_pre  = 2;
-#elif defined(CONFIG_SYS_FSL_DDR3)
+#elif defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 	if (rd_to_pre < 4)
 		rd_to_pre = 4;
 #endif
@@ -541,13 +616,20 @@
 		rd_to_pre += 2; /* according to UM */
 
 	wr_data_delay = popts->write_data_delay;
+#ifdef CONFIG_SYS_FSL_DDR4
+	cpo = 0;
+	cke_pls = max(3, picos_to_mclk(5000));
+#else
 	cke_pls = picos_to_mclk(popts->tcke_clock_pulse_width_ps);
+#endif
+
 	four_act = picos_to_mclk(popts->tfaw_window_four_activates_ps);
 
 	ddr->timing_cfg_2 = (0
 		| ((add_lat_mclk & 0xf) << 28)
 		| ((cpo & 0x1f) << 23)
 		| ((wr_lat & 0xf) << 19)
+		| ((wr_lat & 0x10) << 14)
 		| ((rd_to_pre & RD_TO_PRE_MASK) << RD_TO_PRE_SHIFT)
 		| ((wr_data_delay & WR_DATA_DELAY_MASK) << WR_DATA_DELAY_SHIFT)
 		| ((cke_pls & 0x7) << 6)
@@ -640,7 +722,8 @@
 	 * we must clear it when use the on-the-fly mode,
 	 * must set it when use the 32-bits bus mode.
 	 */
-	if (sdram_type == SDRAM_TYPE_DDR3) {
+	if ((sdram_type == SDRAM_TYPE_DDR3) ||
+	    (sdram_type == SDRAM_TYPE_DDR4)) {
 		if (popts->burst_length == DDR_BL8)
 			eight_be = 1;
 		if (popts->burst_length == DDR_OTF)
@@ -682,8 +765,6 @@
 {
 	unsigned int frc_sr = 0;	/* Force self refresh */
 	unsigned int sr_ie = 0;		/* Self-refresh interrupt enable */
-	unsigned int dll_rst_dis;	/* DLL reset disable */
-	unsigned int dqs_cfg;		/* DQS configuration */
 	unsigned int odt_cfg = 0;	/* ODT configuration */
 	unsigned int num_pr;		/* Number of posted refreshes */
 	unsigned int slow = 0;		/* DDR will be run less than 1250 */
@@ -695,9 +776,12 @@
 	unsigned int md_en = 0;		/* Mirrored DIMM Enable */
 	unsigned int qd_en = 0;		/* quad-rank DIMM Enable */
 	int i;
+#ifndef CONFIG_SYS_FSL_DDR4
+	unsigned int dll_rst_dis = 1;	/* DLL reset disable */
+	unsigned int dqs_cfg;		/* DQS configuration */
 
-	dll_rst_dis = 1;	/* Make this configurable */
 	dqs_cfg = popts->dqs_config;
+#endif
 	for (i = 0; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
 		if (popts->cs_local_opts[i].odt_rd_cfg
 			|| popts->cs_local_opts[i].odt_wr_cfg) {
@@ -715,7 +799,7 @@
 	 *        * ({EXT_REFREC || REFREC} + 8 + 2)]}
 	 *      << DDR_SDRAM_INTERVAL[REFINT]
 	 */
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 	obc_cfg = popts->otf_burst_chop_en;
 #else
 	obc_cfg = 0;
@@ -744,15 +828,17 @@
 	d_init = 0;
 #endif
 
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 	md_en = popts->mirrored_dimm;
 #endif
 	qd_en = popts->quad_rank_present ? 1 : 0;
 	ddr->ddr_sdram_cfg_2 = (0
 		| ((frc_sr & 0x1) << 31)
 		| ((sr_ie & 0x1) << 30)
+#ifndef CONFIG_SYS_FSL_DDR4
 		| ((dll_rst_dis & 0x1) << 29)
 		| ((dqs_cfg & 0x3) << 26)
+#endif
 		| ((odt_cfg & 0x3) << 21)
 		| ((num_pr & 0xf) << 12)
 		| ((slow & 1) << 11)
@@ -768,6 +854,7 @@
 	debug("FSLDDR: ddr_sdram_cfg_2 = 0x%08x\n", ddr->ddr_sdram_cfg_2);
 }
 
+#ifdef CONFIG_SYS_FSL_DDR4
 /* DDR SDRAM Mode configuration 2 (DDR_SDRAM_MODE_2) */
 static void set_ddr_sdram_mode_2(fsl_ddr_cfg_regs_t *ddr,
 				const memctl_options_t *popts,
@@ -776,8 +863,93 @@
 {
 	unsigned short esdmode2 = 0;	/* Extended SDRAM mode 2 */
 	unsigned short esdmode3 = 0;	/* Extended SDRAM mode 3 */
+	int i;
+	unsigned int wr_crc = 0;	/* Disable */
+	unsigned int rtt_wr = 0;	/* Rtt_WR - dynamic ODT off */
+	unsigned int srt = 0;	/* self-refresh temerature, normal range */
+	unsigned int cwl = compute_cas_write_latency() - 9;
+	unsigned int mpr = 0;	/* serial */
+	unsigned int wc_lat;
+	const unsigned int mclk_ps = get_memory_clk_period_ps();
 
-#if defined(CONFIG_SYS_FSL_DDR3)
+	if (popts->rtt_override)
+		rtt_wr = popts->rtt_wr_override_value;
+	else
+		rtt_wr = popts->cs_local_opts[0].odt_rtt_wr;
+
+	if (common_dimm->extended_op_srt)
+		srt = common_dimm->extended_op_srt;
+
+	esdmode2 = (0
+		| ((wr_crc & 0x1) << 12)
+		| ((rtt_wr & 0x3) << 9)
+		| ((srt & 0x3) << 6)
+		| ((cwl & 0x7) << 3));
+
+	if (mclk_ps >= 1250)
+		wc_lat = 0;
+	else if (mclk_ps >= 833)
+		wc_lat = 1;
+	else
+		wc_lat = 2;
+
+	esdmode3 = (0
+		| ((mpr & 0x3) << 11)
+		| ((wc_lat & 0x3) << 9));
+
+	ddr->ddr_sdram_mode_2 = (0
+				 | ((esdmode2 & 0xFFFF) << 16)
+				 | ((esdmode3 & 0xFFFF) << 0)
+				 );
+	debug("FSLDDR: ddr_sdram_mode_2 = 0x%08x\n", ddr->ddr_sdram_mode_2);
+
+	if (unq_mrs_en) {	/* unique mode registers are supported */
+		for (i = 1; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
+			if (popts->rtt_override)
+				rtt_wr = popts->rtt_wr_override_value;
+			else
+				rtt_wr = popts->cs_local_opts[i].odt_rtt_wr;
+
+			esdmode2 &= 0xF9FF;	/* clear bit 10, 9 */
+			esdmode2 |= (rtt_wr & 0x3) << 9;
+			switch (i) {
+			case 1:
+				ddr->ddr_sdram_mode_4 = (0
+					| ((esdmode2 & 0xFFFF) << 16)
+					| ((esdmode3 & 0xFFFF) << 0)
+					);
+				break;
+			case 2:
+				ddr->ddr_sdram_mode_6 = (0
+					| ((esdmode2 & 0xFFFF) << 16)
+					| ((esdmode3 & 0xFFFF) << 0)
+					);
+				break;
+			case 3:
+				ddr->ddr_sdram_mode_8 = (0
+					| ((esdmode2 & 0xFFFF) << 16)
+					| ((esdmode3 & 0xFFFF) << 0)
+					);
+				break;
+			}
+		}
+		debug("FSLDDR: ddr_sdram_mode_4 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_4);
+		debug("FSLDDR: ddr_sdram_mode_6 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_6);
+		debug("FSLDDR: ddr_sdram_mode_8 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_8);
+	}
+}
+#elif defined(CONFIG_SYS_FSL_DDR3)
+/* DDR SDRAM Mode configuration 2 (DDR_SDRAM_MODE_2) */
+static void set_ddr_sdram_mode_2(fsl_ddr_cfg_regs_t *ddr,
+				const memctl_options_t *popts,
+				const common_timing_params_t *common_dimm,
+				const unsigned int unq_mrs_en)
+{
+	unsigned short esdmode2 = 0;	/* Extended SDRAM mode 2 */
+	unsigned short esdmode3 = 0;	/* Extended SDRAM mode 3 */
 	int i;
 	unsigned int rtt_wr = 0;	/* Rtt_WR - dynamic ODT off */
 	unsigned int srt = 0;	/* self-refresh temerature, normal range */
@@ -799,14 +971,12 @@
 		| ((asr & 0x1) << 6)
 		| ((cwl & 0x7) << 3)
 		| ((pasr & 0x7) << 0));
-#endif
 	ddr->ddr_sdram_mode_2 = (0
 				 | ((esdmode2 & 0xFFFF) << 16)
 				 | ((esdmode3 & 0xFFFF) << 0)
 				 );
 	debug("FSLDDR: ddr_sdram_mode_2 = 0x%08x\n", ddr->ddr_sdram_mode_2);
 
-#ifdef CONFIG_SYS_FSL_DDR3
 	if (unq_mrs_en) {	/* unique mode registers are supported */
 		for (i = 1; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
 			if (popts->rtt_override)
@@ -844,9 +1014,128 @@
 		debug("FSLDDR: ddr_sdram_mode_8 = 0x%08x\n",
 			ddr->ddr_sdram_mode_8);
 	}
-#endif
 }
 
+#else /* for DDR2 and DDR1 */
+/* DDR SDRAM Mode configuration 2 (DDR_SDRAM_MODE_2) */
+static void set_ddr_sdram_mode_2(fsl_ddr_cfg_regs_t *ddr,
+				const memctl_options_t *popts,
+				const common_timing_params_t *common_dimm,
+				const unsigned int unq_mrs_en)
+{
+	unsigned short esdmode2 = 0;	/* Extended SDRAM mode 2 */
+	unsigned short esdmode3 = 0;	/* Extended SDRAM mode 3 */
+
+	ddr->ddr_sdram_mode_2 = (0
+				 | ((esdmode2 & 0xFFFF) << 16)
+				 | ((esdmode3 & 0xFFFF) << 0)
+				 );
+	debug("FSLDDR: ddr_sdram_mode_2 = 0x%08x\n", ddr->ddr_sdram_mode_2);
+}
+#endif
+
+#ifdef CONFIG_SYS_FSL_DDR4
+/* DDR SDRAM Mode configuration 9 (DDR_SDRAM_MODE_9) */
+static void set_ddr_sdram_mode_9(fsl_ddr_cfg_regs_t *ddr,
+				const memctl_options_t *popts,
+				const common_timing_params_t *common_dimm,
+				const unsigned int unq_mrs_en)
+{
+	int i;
+	unsigned short esdmode4 = 0;	/* Extended SDRAM mode 4 */
+	unsigned short esdmode5;	/* Extended SDRAM mode 5 */
+
+	esdmode5 = 0x00000400;		/* Data mask enabled */
+
+	ddr->ddr_sdram_mode_9 = (0
+				 | ((esdmode4 & 0xffff) << 16)
+				 | ((esdmode5 & 0xffff) << 0)
+				);
+	debug("FSLDDR: ddr_sdram_mode_9) = 0x%08x\n", ddr->ddr_sdram_mode_9);
+	if (unq_mrs_en) {	/* unique mode registers are supported */
+		for (i = 1; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
+			switch (i) {
+			case 1:
+				ddr->ddr_sdram_mode_11 = (0
+					| ((esdmode4 & 0xFFFF) << 16)
+					| ((esdmode5 & 0xFFFF) << 0)
+					);
+				break;
+			case 2:
+				ddr->ddr_sdram_mode_13 = (0
+					| ((esdmode4 & 0xFFFF) << 16)
+					| ((esdmode5 & 0xFFFF) << 0)
+					);
+				break;
+			case 3:
+				ddr->ddr_sdram_mode_15 = (0
+					| ((esdmode4 & 0xFFFF) << 16)
+					| ((esdmode5 & 0xFFFF) << 0)
+					);
+				break;
+			}
+		}
+		debug("FSLDDR: ddr_sdram_mode_11 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_11);
+		debug("FSLDDR: ddr_sdram_mode_13 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_13);
+		debug("FSLDDR: ddr_sdram_mode_15 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_15);
+	}
+}
+
+/* DDR SDRAM Mode configuration 10 (DDR_SDRAM_MODE_10) */
+static void set_ddr_sdram_mode_10(fsl_ddr_cfg_regs_t *ddr,
+				const memctl_options_t *popts,
+				const common_timing_params_t *common_dimm,
+				const unsigned int unq_mrs_en)
+{
+	int i;
+	unsigned short esdmode6 = 0;	/* Extended SDRAM mode 6 */
+	unsigned short esdmode7 = 0;	/* Extended SDRAM mode 7 */
+	unsigned int tccdl_min = picos_to_mclk(common_dimm->tccdl_ps);
+
+	esdmode6 = ((tccdl_min - 4) & 0x7) << 10;
+
+	ddr->ddr_sdram_mode_10 = (0
+				 | ((esdmode6 & 0xffff) << 16)
+				 | ((esdmode7 & 0xffff) << 0)
+				);
+	debug("FSLDDR: ddr_sdram_mode_10) = 0x%08x\n", ddr->ddr_sdram_mode_10);
+	if (unq_mrs_en) {	/* unique mode registers are supported */
+		for (i = 1; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
+			switch (i) {
+			case 1:
+				ddr->ddr_sdram_mode_12 = (0
+					| ((esdmode6 & 0xFFFF) << 16)
+					| ((esdmode7 & 0xFFFF) << 0)
+					);
+				break;
+			case 2:
+				ddr->ddr_sdram_mode_14 = (0
+					| ((esdmode6 & 0xFFFF) << 16)
+					| ((esdmode7 & 0xFFFF) << 0)
+					);
+				break;
+			case 3:
+				ddr->ddr_sdram_mode_16 = (0
+					| ((esdmode6 & 0xFFFF) << 16)
+					| ((esdmode7 & 0xFFFF) << 0)
+					);
+				break;
+			}
+		}
+		debug("FSLDDR: ddr_sdram_mode_12 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_12);
+		debug("FSLDDR: ddr_sdram_mode_14 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_14);
+		debug("FSLDDR: ddr_sdram_mode_16 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_16);
+	}
+}
+
+#endif
+
 /* DDR SDRAM Interval Configuration (DDR_SDRAM_INTERVAL) */
 static void set_ddr_sdram_interval(fsl_ddr_cfg_regs_t *ddr,
 			       const memctl_options_t *popts,
@@ -867,7 +1156,7 @@
 	debug("FSLDDR: ddr_sdram_interval = 0x%08x\n", ddr->ddr_sdram_interval);
 }
 
-#if defined(CONFIG_SYS_FSL_DDR3)
+#ifdef CONFIG_SYS_FSL_DDR4
 /* DDR SDRAM Mode configuration set (DDR_SDRAM_MODE) */
 static void set_ddr_sdram_mode(fsl_ddr_cfg_regs_t *ddr,
 			       const memctl_options_t *popts,
@@ -876,6 +1165,177 @@
 			       unsigned int additive_latency,
 			       const unsigned int unq_mrs_en)
 {
+	int i;
+	unsigned short esdmode;		/* Extended SDRAM mode */
+	unsigned short sdmode;		/* SDRAM mode */
+
+	/* Mode Register - MR1 */
+	unsigned int qoff = 0;		/* Output buffer enable 0=yes, 1=no */
+	unsigned int tdqs_en = 0;	/* TDQS Enable: 0=no, 1=yes */
+	unsigned int rtt;
+	unsigned int wrlvl_en = 0;	/* Write level enable: 0=no, 1=yes */
+	unsigned int al = 0;		/* Posted CAS# additive latency (AL) */
+	unsigned int dic = 0;		/* Output driver impedance, 40ohm */
+	unsigned int dll_en = 1;	/* DLL Enable  1=Enable (Normal),
+						       0=Disable (Test/Debug) */
+
+	/* Mode Register - MR0 */
+	unsigned int wr = 0;	/* Write Recovery */
+	unsigned int dll_rst;	/* DLL Reset */
+	unsigned int mode;	/* Normal=0 or Test=1 */
+	unsigned int caslat = 4;/* CAS# latency, default set as 6 cycles */
+	/* BT: Burst Type (0=Nibble Sequential, 1=Interleaved) */
+	unsigned int bt;
+	unsigned int bl;	/* BL: Burst Length */
+
+	unsigned int wr_mclk;
+	/* DDR4 support WR 10, 12, 14, 16, 18, 20, 24 */
+	static const u8 wr_table[] = {
+		0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6};
+	/* DDR4 support CAS 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24 */
+	static const u8 cas_latency_table[] = {
+		0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
+		9, 9, 10, 10, 11, 11};
+
+	if (popts->rtt_override)
+		rtt = popts->rtt_override_value;
+	else
+		rtt = popts->cs_local_opts[0].odt_rtt_norm;
+
+	if (additive_latency == (cas_latency - 1))
+		al = 1;
+	if (additive_latency == (cas_latency - 2))
+		al = 2;
+
+	if (popts->quad_rank_present)
+		dic = 1;	/* output driver impedance 240/7 ohm */
+
+	/*
+	 * The esdmode value will also be used for writing
+	 * MR1 during write leveling for DDR3, although the
+	 * bits specifically related to the write leveling
+	 * scheme will be handled automatically by the DDR
+	 * controller. so we set the wrlvl_en = 0 here.
+	 */
+	esdmode = (0
+		| ((qoff & 0x1) << 12)
+		| ((tdqs_en & 0x1) << 11)
+		| ((rtt & 0x7) << 8)
+		| ((wrlvl_en & 0x1) << 7)
+		| ((al & 0x3) << 3)
+		| ((dic & 0x3) << 1)   /* DIC field is split */
+		| ((dll_en & 0x1) << 0)
+		);
+
+	/*
+	 * DLL control for precharge PD
+	 * 0=slow exit DLL off (tXPDLL)
+	 * 1=fast exit DLL on (tXP)
+	 */
+
+	wr_mclk = picos_to_mclk(common_dimm->twr_ps);
+	if (wr_mclk <= 24) {
+		wr = wr_table[wr_mclk - 10];
+	} else {
+		printf("Error: unsupported write recovery for mode register wr_mclk = %d\n",
+		       wr_mclk);
+	}
+
+	dll_rst = 0;	/* dll no reset */
+	mode = 0;	/* normal mode */
+
+	/* look up table to get the cas latency bits */
+	if (cas_latency >= 9 && cas_latency <= 24)
+		caslat = cas_latency_table[cas_latency - 9];
+	else
+		printf("Error: unsupported cas latency for mode register\n");
+
+	bt = 0;	/* Nibble sequential */
+
+	switch (popts->burst_length) {
+	case DDR_BL8:
+		bl = 0;
+		break;
+	case DDR_OTF:
+		bl = 1;
+		break;
+	case DDR_BC4:
+		bl = 2;
+		break;
+	default:
+		printf("Error: invalid burst length of %u specified. ",
+		       popts->burst_length);
+		puts("Defaulting to on-the-fly BC4 or BL8 beats.\n");
+		bl = 1;
+		break;
+	}
+
+	sdmode = (0
+		  | ((wr & 0x7) << 9)
+		  | ((dll_rst & 0x1) << 8)
+		  | ((mode & 0x1) << 7)
+		  | (((caslat >> 1) & 0x7) << 4)
+		  | ((bt & 0x1) << 3)
+		  | ((caslat & 1) << 2)
+		  | ((bl & 0x3) << 0)
+		  );
+
+	ddr->ddr_sdram_mode = (0
+			       | ((esdmode & 0xFFFF) << 16)
+			       | ((sdmode & 0xFFFF) << 0)
+			       );
+
+	debug("FSLDDR: ddr_sdram_mode = 0x%08x\n", ddr->ddr_sdram_mode);
+
+	if (unq_mrs_en) {	/* unique mode registers are supported */
+		for (i = 1; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
+			if (popts->rtt_override)
+				rtt = popts->rtt_override_value;
+			else
+				rtt = popts->cs_local_opts[i].odt_rtt_norm;
+
+			esdmode &= 0xF8FF;	/* clear bit 10,9,8 for rtt */
+			esdmode |= (rtt & 0x7) << 8;
+			switch (i) {
+			case 1:
+				ddr->ddr_sdram_mode_3 = (0
+				       | ((esdmode & 0xFFFF) << 16)
+				       | ((sdmode & 0xFFFF) << 0)
+				       );
+				break;
+			case 2:
+				ddr->ddr_sdram_mode_5 = (0
+				       | ((esdmode & 0xFFFF) << 16)
+				       | ((sdmode & 0xFFFF) << 0)
+				       );
+				break;
+			case 3:
+				ddr->ddr_sdram_mode_7 = (0
+				       | ((esdmode & 0xFFFF) << 16)
+				       | ((sdmode & 0xFFFF) << 0)
+				       );
+				break;
+			}
+		}
+		debug("FSLDDR: ddr_sdram_mode_3 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_3);
+		debug("FSLDDR: ddr_sdram_mode_5 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_5);
+		debug("FSLDDR: ddr_sdram_mode_5 = 0x%08x\n",
+		      ddr->ddr_sdram_mode_5);
+	}
+}
+
+#elif defined(CONFIG_SYS_FSL_DDR3)
+/* DDR SDRAM Mode configuration set (DDR_SDRAM_MODE) */
+static void set_ddr_sdram_mode(fsl_ddr_cfg_regs_t *ddr,
+			       const memctl_options_t *popts,
+			       const common_timing_params_t *common_dimm,
+			       unsigned int cas_latency,
+			       unsigned int additive_latency,
+			       const unsigned int unq_mrs_en)
+{
+	int i;
 	unsigned short esdmode;		/* Extended SDRAM mode */
 	unsigned short sdmode;		/* SDRAM mode */
 
@@ -907,9 +1367,6 @@
 	 */
 	static const u8 wr_table[] = {1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 0, 0};
 
-	const unsigned int mclk_ps = get_memory_clk_period_ps();
-	int i;
-
 	if (popts->rtt_override)
 		rtt = popts->rtt_override_value;
 	else
@@ -950,7 +1407,7 @@
 	 */
 	dll_on = 1;
 
-	wr_mclk = (common_dimm->twr_ps + mclk_ps - 1) / mclk_ps;
+	wr_mclk = picos_to_mclk(common_dimm->twr_ps);
 	if (wr_mclk <= 16) {
 		wr = wr_table[wr_mclk - 5];
 	} else {
@@ -1109,9 +1566,6 @@
 	unsigned int bt;
 	unsigned int bl;	/* BL: Burst Length */
 
-#if defined(CONFIG_SYS_FSL_DDR2)
-	const unsigned int mclk_ps = get_memory_clk_period_ps();
-#endif
 	dqs_en = !popts->dqs_config;
 	rtt = fsl_ddr_get_rtt();
 
@@ -1141,7 +1595,7 @@
 #if defined(CONFIG_SYS_FSL_DDR1)
 	wr = 0;       /* Historical */
 #elif defined(CONFIG_SYS_FSL_DDR2)
-	wr = (common_dimm->twr_ps + mclk_ps - 1) / mclk_ps - 1;
+	wr = picos_to_mclk(common_dimm->twr_ps);
 #endif
 	dll_res = 0;
 	mode = 0;
@@ -1255,7 +1709,7 @@
 	unsigned int wwt = 0; /* Write-to-write turnaround for same CS */
 	unsigned int dll_lock = 0; /* DDR SDRAM DLL Lock Time */
 
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 	if (popts->burst_length == DDR_BL8) {
 		/* We set BL/2 for fixed BL8 */
 		rrt = 0;	/* BL/2 clocks */
@@ -1265,6 +1719,11 @@
 		rrt = 2;	/* BL/2 + 2 clocks */
 		wwt = 2;	/* BL/2 + 2 clocks */
 	}
+#endif
+
+#ifdef CONFIG_SYS_FSL_DDR4
+	dll_lock = 2;	/* tDLLK = 1024 clocks */
+#elif defined(CONFIG_SYS_FSL_DDR3)
 	dll_lock = 1;	/* tDLLK = 512 clocks from spec */
 #endif
 	ddr->timing_cfg_4 = (0
@@ -1285,9 +1744,12 @@
 	unsigned int wodt_on = 0;	/* Write to ODT on */
 	unsigned int wodt_off = 0;	/* Write to ODT off */
 
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
+	unsigned int wr_lat = ((ddr->timing_cfg_2 & 0x00780000) >> 19) +
+			      ((ddr->timing_cfg_2 & 0x00040000) >> 14);
 	/* rodt_on = timing_cfg_1[caslat] - timing_cfg_2[wrlat] + 1 */
-	rodt_on = cas_latency - ((ddr->timing_cfg_2 & 0x00780000) >> 19) + 1;
+	if (cas_latency >= wr_lat)
+		rodt_on = cas_latency - wr_lat + 1;
 	rodt_off = 4;	/*  4 clocks */
 	wodt_on = 1;	/*  1 clocks */
 	wodt_off = 4;	/*  4 clocks */
@@ -1302,6 +1764,164 @@
 	debug("FSLDDR: timing_cfg_5 = 0x%08x\n", ddr->timing_cfg_5);
 }
 
+#ifdef CONFIG_SYS_FSL_DDR4
+static void set_timing_cfg_6(fsl_ddr_cfg_regs_t *ddr)
+{
+	unsigned int hs_caslat = 0;
+	unsigned int hs_wrlat = 0;
+	unsigned int hs_wrrec = 0;
+	unsigned int hs_clkadj = 0;
+	unsigned int hs_wrlvl_start = 0;
+
+	ddr->timing_cfg_6 = (0
+			     | ((hs_caslat & 0x1f) << 24)
+			     | ((hs_wrlat & 0x1f) << 19)
+			     | ((hs_wrrec & 0x1f) << 12)
+			     | ((hs_clkadj & 0x1f) << 6)
+			     | ((hs_wrlvl_start & 0x1f) << 0)
+			    );
+	debug("FSLDDR: timing_cfg_6 = 0x%08x\n", ddr->timing_cfg_6);
+}
+
+static void set_timing_cfg_7(fsl_ddr_cfg_regs_t *ddr,
+			const common_timing_params_t *common_dimm)
+{
+	unsigned int txpr, tcksre, tcksrx;
+	unsigned int cke_rst, cksre, cksrx, par_lat, cs_to_cmd;
+
+	txpr = max(5, picos_to_mclk(common_dimm->trfc1_ps + 10000));
+	tcksre = max(5, picos_to_mclk(10000));
+	tcksrx = max(5, picos_to_mclk(10000));
+	par_lat = 0;
+	cs_to_cmd = 0;
+
+	if (txpr <= 200)
+		cke_rst = 0;
+	else if (txpr <= 256)
+		cke_rst = 1;
+	else if (txpr <= 512)
+		cke_rst = 2;
+	else
+		cke_rst = 3;
+
+	if (tcksre <= 19)
+		cksre = tcksre - 5;
+	else
+		cksre = 15;
+
+	if (tcksrx <= 19)
+		cksrx = tcksrx - 5;
+	else
+		cksrx = 15;
+
+	ddr->timing_cfg_7 = (0
+			     | ((cke_rst & 0x3) << 28)
+			     | ((cksre & 0xf) << 24)
+			     | ((cksrx & 0xf) << 20)
+			     | ((par_lat & 0xf) << 16)
+			     | ((cs_to_cmd & 0xf) << 4)
+			    );
+	debug("FSLDDR: timing_cfg_7 = 0x%08x\n", ddr->timing_cfg_7);
+}
+
+static void set_timing_cfg_8(fsl_ddr_cfg_regs_t *ddr,
+			     const memctl_options_t *popts,
+			     const common_timing_params_t *common_dimm,
+			     unsigned int cas_latency)
+{
+	unsigned int rwt_bg, wrt_bg, rrt_bg, wwt_bg;
+	unsigned int acttoact_bg, wrtord_bg, pre_all_rec;
+	unsigned int tccdl = picos_to_mclk(common_dimm->tccdl_ps);
+	unsigned int wr_lat = ((ddr->timing_cfg_2 & 0x00780000) >> 19) +
+			      ((ddr->timing_cfg_2 & 0x00040000) >> 14);
+
+	rwt_bg = cas_latency + 2 + 4 - wr_lat;
+	if (rwt_bg < tccdl)
+		rwt_bg = tccdl - rwt_bg;
+	else
+		rwt_bg = 0;
+
+	wrt_bg = wr_lat + 4 + 1 - cas_latency;
+	if (wrt_bg < tccdl)
+		wrt_bg = tccdl - wrt_bg;
+	else
+		wrt_bg = 0;
+
+	if (popts->burst_length == DDR_BL8) {
+		rrt_bg = tccdl - 4;
+		wwt_bg = tccdl - 4;
+	} else {
+		rrt_bg = tccdl - 2;
+		wwt_bg = tccdl - 4;
+	}
+
+	acttoact_bg = picos_to_mclk(common_dimm->trrdl_ps);
+	wrtord_bg = max(4, picos_to_mclk(7500));
+	pre_all_rec = 0;
+
+	ddr->timing_cfg_8 = (0
+			     | ((rwt_bg & 0xf) << 28)
+			     | ((wrt_bg & 0xf) << 24)
+			     | ((rrt_bg & 0xf) << 20)
+			     | ((wwt_bg & 0xf) << 16)
+			     | ((acttoact_bg & 0xf) << 12)
+			     | ((wrtord_bg & 0xf) << 8)
+			     | ((pre_all_rec & 0x1f) << 0)
+			    );
+
+	debug("FSLDDR: timing_cfg_8 = 0x%08x\n", ddr->timing_cfg_8);
+}
+
+static void set_timing_cfg_9(fsl_ddr_cfg_regs_t *ddr)
+{
+	ddr->timing_cfg_9 = 0;
+	debug("FSLDDR: timing_cfg_9 = 0x%08x\n", ddr->timing_cfg_9);
+}
+
+static void set_ddr_dq_mapping(fsl_ddr_cfg_regs_t *ddr,
+			       const dimm_params_t *dimm_params)
+{
+	ddr->dq_map_0 = ((dimm_params->dq_mapping[0] & 0x3F) << 26) |
+			((dimm_params->dq_mapping[1] & 0x3F) << 20) |
+			((dimm_params->dq_mapping[2] & 0x3F) << 14) |
+			((dimm_params->dq_mapping[3] & 0x3F) << 8) |
+			((dimm_params->dq_mapping[4] & 0x3F) << 2);
+
+	ddr->dq_map_1 = ((dimm_params->dq_mapping[5] & 0x3F) << 26) |
+			((dimm_params->dq_mapping[6] & 0x3F) << 20) |
+			((dimm_params->dq_mapping[7] & 0x3F) << 14) |
+			((dimm_params->dq_mapping[10] & 0x3F) << 8) |
+			((dimm_params->dq_mapping[11] & 0x3F) << 2);
+
+	ddr->dq_map_2 = ((dimm_params->dq_mapping[12] & 0x3F) << 26) |
+			((dimm_params->dq_mapping[13] & 0x3F) << 20) |
+			((dimm_params->dq_mapping[14] & 0x3F) << 14) |
+			((dimm_params->dq_mapping[15] & 0x3F) << 8) |
+			((dimm_params->dq_mapping[16] & 0x3F) << 2);
+
+	ddr->dq_map_3 = ((dimm_params->dq_mapping[17] & 0x3F) << 26) |
+			((dimm_params->dq_mapping[8] & 0x3F) << 20) |
+			((dimm_params->dq_mapping[9] & 0x3F) << 14) |
+			dimm_params->dq_mapping_ors;
+
+	debug("FSLDDR: dq_map_0 = 0x%08x\n", ddr->dq_map_0);
+	debug("FSLDDR: dq_map_1 = 0x%08x\n", ddr->dq_map_1);
+	debug("FSLDDR: dq_map_2 = 0x%08x\n", ddr->dq_map_2);
+	debug("FSLDDR: dq_map_3 = 0x%08x\n", ddr->dq_map_3);
+}
+static void set_ddr_sdram_cfg_3(fsl_ddr_cfg_regs_t *ddr,
+			       const memctl_options_t *popts)
+{
+	int rd_pre;
+
+	rd_pre = popts->quad_rank_present ? 1 : 0;
+
+	ddr->ddr_sdram_cfg_3 = (rd_pre & 0x1) << 16;
+
+	debug("FSLDDR: ddr_sdram_cfg_3 = 0x%08x\n", ddr->ddr_sdram_cfg_3);
+}
+#endif	/* CONFIG_SYS_FSL_DDR4 */
+
 /* DDR ZQ Calibration Control (DDR_ZQ_CNTL) */
 static void set_ddr_zq_cntl(fsl_ddr_cfg_regs_t *ddr, unsigned int zq_en)
 {
@@ -1310,11 +1930,21 @@
 	unsigned int zqoper = 0;
 	/* Normal Operation Short Calibration Time (tZQCS) */
 	unsigned int zqcs = 0;
+#ifdef CONFIG_SYS_FSL_DDR4
+	unsigned int zqcs_init;
+#endif
 
 	if (zq_en) {
+#ifdef CONFIG_SYS_FSL_DDR4
+		zqinit = 10;	/* 1024 clocks */
+		zqoper = 9;	/* 512 clocks */
+		zqcs = 7;	/* 128 clocks */
+		zqcs_init = 5;	/* 1024 refresh sequences */
+#else
 		zqinit = 9;	/* 512 clocks */
 		zqoper = 8;	/* 256 clocks */
 		zqcs = 6;	/* 64 clocks */
+#endif
 	}
 
 	ddr->ddr_zq_cntl = (0
@@ -1322,6 +1952,9 @@
 			    | ((zqinit & 0xF) << 24)
 			    | ((zqoper & 0xF) << 16)
 			    | ((zqcs & 0xF) << 8)
+#ifdef CONFIG_SYS_FSL_DDR4
+			    | ((zqcs_init & 0xF) << 0)
+#endif
 			    );
 	debug("FSLDDR: zq_cntl = 0x%08x\n", ddr->ddr_zq_cntl);
 }
@@ -1478,7 +2111,7 @@
 	 */
 	cas_latency = (popts->cas_latency_override)
 		? popts->cas_latency_override_value
-		: common_dimm->lowest_common_SPD_caslat;
+		: common_dimm->lowest_common_spd_caslat;
 
 	additive_latency = (popts->additive_latency_override)
 		? popts->additive_latency_override_value
@@ -1639,6 +2272,10 @@
 	set_ddr_sdram_mode(ddr, popts, common_dimm,
 				cas_latency, additive_latency, unq_mrs_en);
 	set_ddr_sdram_mode_2(ddr, popts, common_dimm, unq_mrs_en);
+#ifdef CONFIG_SYS_FSL_DDR4
+	set_ddr_sdram_mode_9(ddr, popts, common_dimm, unq_mrs_en);
+	set_ddr_sdram_mode_10(ddr, popts, common_dimm, unq_mrs_en);
+#endif
 	set_ddr_sdram_interval(ddr, popts, common_dimm);
 	set_ddr_data_init(ddr);
 	set_ddr_sdram_clk_cntl(ddr, popts);
@@ -1646,6 +2283,14 @@
 	set_ddr_init_ext_addr(ddr);
 	set_timing_cfg_4(ddr, popts);
 	set_timing_cfg_5(ddr, cas_latency);
+#ifdef CONFIG_SYS_FSL_DDR4
+	set_ddr_sdram_cfg_3(ddr, popts);
+	set_timing_cfg_6(ddr);
+	set_timing_cfg_7(ddr, common_dimm);
+	set_timing_cfg_8(ddr, popts, common_dimm, cas_latency);
+	set_timing_cfg_9(ddr);
+	set_ddr_dq_mapping(ddr, dimm_params);
+#endif
 
 	set_ddr_zq_cntl(ddr, zq_en);
 	set_ddr_wrlvl_cntl(ddr, wrlvl_en, popts);
diff --git a/drivers/ddr/fsl/ddr4_dimm_params.c b/drivers/ddr/fsl/ddr4_dimm_params.c
new file mode 100644
index 0000000..4745b7f
--- /dev/null
+++ b/drivers/ddr/fsl/ddr4_dimm_params.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * calculate the organization and timing parameter
+ * from ddr3 spd, please refer to the spec
+ * JEDEC standard No.21-C 4_01_02_12R23A.pdf
+ *
+ *
+ */
+
+#include <common.h>
+#include <fsl_ddr_sdram.h>
+
+#include <fsl_ddr.h>
+
+/*
+ * Calculate the Density of each Physical Rank.
+ * Returned size is in bytes.
+ *
+ * Total DIMM size =
+ * sdram capacity(bit) / 8 * primary bus width / sdram width
+ *                     * Logical Ranks per DIMM
+ *
+ * where: sdram capacity  = spd byte4[3:0]
+ *        primary bus width = spd byte13[2:0]
+ *        sdram width = spd byte12[2:0]
+ *        Logical Ranks per DIMM = spd byte12[5:3] for SDP, DDP, QDP
+ *                                 spd byte12{5:3] * spd byte6[6:4] for 3DS
+ *
+ * To simplify each rank size = total DIMM size / Number of Package Ranks
+ * where Number of Package Ranks = spd byte12[5:3]
+ *
+ * SPD byte4 - sdram density and banks
+ *	bit[3:0]	size(bit)	size(byte)
+ *	0000		256Mb		32MB
+ *	0001		512Mb		64MB
+ *	0010		1Gb		128MB
+ *	0011		2Gb		256MB
+ *	0100		4Gb		512MB
+ *	0101		8Gb		1GB
+ *	0110		16Gb		2GB
+ *      0111		32Gb		4GB
+ *
+ * SPD byte13 - module memory bus width
+ *	bit[2:0]	primary bus width
+ *	000		8bits
+ *	001		16bits
+ *	010		32bits
+ *	011		64bits
+ *
+ * SPD byte12 - module organization
+ *	bit[2:0]	sdram device width
+ *	000		4bits
+ *	001		8bits
+ *	010		16bits
+ *	011		32bits
+ *
+ * SPD byte12 - module organization
+ *	bit[5:3]	number of package ranks per DIMM
+ *	000		1
+ *	001		2
+ *	010		3
+ *	011		4
+ *
+ * SPD byte6 - SDRAM package type
+ *	bit[6:4]	Die count
+ *	000		1
+ *	001		2
+ *	010		3
+ *	011		4
+ *	100		5
+ *	101		6
+ *	110		7
+ *	111		8
+ *
+ * SPD byte6 - SRAM package type
+ *	bit[1:0]	Signal loading
+ *	00		Not specified
+ *	01		Multi load stack
+ *	10		Sigle load stack (3DS)
+ *	11		Reserved
+ */
+static unsigned long long
+compute_ranksize(const struct ddr4_spd_eeprom_s *spd)
+{
+	unsigned long long bsize;
+
+	int nbit_sdram_cap_bsize = 0;
+	int nbit_primary_bus_width = 0;
+	int nbit_sdram_width = 0;
+	int die_count = 0;
+	bool package_3ds;
+
+	if ((spd->density_banks & 0xf) <= 7)
+		nbit_sdram_cap_bsize = (spd->density_banks & 0xf) + 28;
+	if ((spd->bus_width & 0x7) < 4)
+		nbit_primary_bus_width = (spd->bus_width & 0x7) + 3;
+	if ((spd->organization & 0x7) < 4)
+		nbit_sdram_width = (spd->organization & 0x7) + 2;
+	package_3ds = (spd->package_type & 0x3) == 0x2;
+	if (package_3ds)
+		die_count = (spd->package_type >> 4) & 0x7;
+
+	bsize = 1ULL << (nbit_sdram_cap_bsize - 3 +
+			 nbit_primary_bus_width - nbit_sdram_width +
+			 die_count);
+
+	debug("DDR: DDR III rank density = 0x%16llx\n", bsize);
+
+	return bsize;
+}
+
+#define spd_to_ps(mtb, ftb)	\
+	(mtb * pdimm->mtb_ps + (ftb * pdimm->ftb_10th_ps) / 10)
+/*
+ * ddr_compute_dimm_parameters for DDR3 SPD
+ *
+ * Compute DIMM parameters based upon the SPD information in spd.
+ * Writes the results to the dimm_params_t structure pointed by pdimm.
+ *
+ */
+unsigned int
+ddr_compute_dimm_parameters(const generic_spd_eeprom_t *spd,
+			    dimm_params_t *pdimm,
+			    unsigned int dimm_number)
+{
+	unsigned int retval;
+	int i;
+
+	if (spd->mem_type) {
+		if (spd->mem_type != SPD_MEMTYPE_DDR4) {
+			printf("DIMM %u: is not a DDR4 SPD.\n", dimm_number);
+			return 1;
+		}
+	} else {
+		memset(pdimm, 0, sizeof(dimm_params_t));
+		return 1;
+	}
+
+	retval = ddr4_spd_check(spd);
+	if (retval) {
+		printf("DIMM %u: failed checksum\n", dimm_number);
+		return 2;
+	}
+
+	/*
+	 * The part name in ASCII in the SPD EEPROM is not null terminated.
+	 * Guarantee null termination here by presetting all bytes to 0
+	 * and copying the part name in ASCII from the SPD onto it
+	 */
+	memset(pdimm->mpart, 0, sizeof(pdimm->mpart));
+	if ((spd->info_size_crc & 0xF) > 2)
+		memcpy(pdimm->mpart, spd->mpart, sizeof(pdimm->mpart) - 1);
+
+	/* DIMM organization parameters */
+	pdimm->n_ranks = ((spd->organization >> 3) & 0x7) + 1;
+	pdimm->rank_density = compute_ranksize(spd);
+	pdimm->capacity = pdimm->n_ranks * pdimm->rank_density;
+	pdimm->primary_sdram_width = 1 << (3 + (spd->bus_width & 0x7));
+	if ((spd->bus_width >> 3) & 0x3)
+		pdimm->ec_sdram_width = 8;
+	else
+		pdimm->ec_sdram_width = 0;
+	pdimm->data_width = pdimm->primary_sdram_width
+			  + pdimm->ec_sdram_width;
+	pdimm->device_width = 1 << ((spd->organization & 0x7) + 2);
+
+	/* These are the types defined by the JEDEC DDR3 SPD spec */
+	pdimm->mirrored_dimm = 0;
+	pdimm->registered_dimm = 0;
+	switch (spd->module_type & DDR3_SPD_MODULETYPE_MASK) {
+	case DDR3_SPD_MODULETYPE_RDIMM:
+		/* Registered/buffered DIMMs */
+		pdimm->registered_dimm = 1;
+		break;
+
+	case DDR3_SPD_MODULETYPE_UDIMM:
+	case DDR3_SPD_MODULETYPE_SO_DIMM:
+		/* Unbuffered DIMMs */
+		if (spd->mod_section.unbuffered.addr_mapping & 0x1)
+			pdimm->mirrored_dimm = 1;
+		break;
+
+	default:
+		printf("unknown module_type 0x%02X\n", spd->module_type);
+		return 1;
+	}
+
+	/* SDRAM device parameters */
+	pdimm->n_row_addr = ((spd->addressing >> 3) & 0x7) + 12;
+	pdimm->n_col_addr = (spd->addressing & 0x7) + 9;
+	pdimm->bank_addr_bits = (spd->density_banks >> 4) & 0x3;
+	pdimm->bank_group_bits = (spd->density_banks >> 6) & 0x3;
+
+	/*
+	 * The SPD spec has not the ECC bit,
+	 * We consider the DIMM as ECC capability
+	 * when the extension bus exist
+	 */
+	if (pdimm->ec_sdram_width)
+		pdimm->edc_config = 0x02;
+	else
+		pdimm->edc_config = 0x00;
+
+	/*
+	 * The SPD spec has not the burst length byte
+	 * but DDR4 spec has nature BL8 and BC4,
+	 * BL8 -bit3, BC4 -bit2
+	 */
+	pdimm->burst_lengths_bitmask = 0x0c;
+	pdimm->row_density = __ilog2(pdimm->rank_density);
+
+	/* MTB - medium timebase
+	 * The MTB in the SPD spec is 125ps,
+	 *
+	 * FTB - fine timebase
+	 * use 1/10th of ps as our unit to avoid floating point
+	 * eg, 10 for 1ps, 25 for 2.5ps, 50 for 5ps
+	 */
+	if ((spd->timebases & 0xf) == 0x0) {
+		pdimm->mtb_ps = 125;
+		pdimm->ftb_10th_ps = 10;
+
+	} else {
+		printf("Unknown Timebases\n");
+	}
+
+	/* sdram minimum cycle time */
+	pdimm->tckmin_x_ps = spd_to_ps(spd->tck_min, spd->fine_tck_min);
+
+	/* sdram max cycle time */
+	pdimm->tckmax_ps = spd_to_ps(spd->tck_max, spd->fine_tck_max);
+
+	/*
+	 * CAS latency supported
+	 * bit0 - CL7
+	 * bit4 - CL11
+	 * bit8 - CL15
+	 * bit12- CL19
+	 * bit16- CL23
+	 */
+	pdimm->caslat_x  = (spd->caslat_b1 << 7)	|
+			   (spd->caslat_b2 << 15)	|
+			   (spd->caslat_b3 << 23);
+
+	BUG_ON(spd->caslat_b4 != 0);
+
+	/*
+	 * min CAS latency time
+	 */
+	pdimm->taa_ps = spd_to_ps(spd->taa_min, spd->fine_taa_min);
+
+	/*
+	 * min RAS to CAS delay time
+	 */
+	pdimm->trcd_ps = spd_to_ps(spd->trcd_min, spd->fine_trcd_min);
+
+	/*
+	 * Min Row Precharge Delay Time
+	 */
+	pdimm->trp_ps = spd_to_ps(spd->trp_min, spd->fine_trp_min);
+
+	/* min active to precharge delay time */
+	pdimm->tras_ps = (((spd->tras_trc_ext & 0xf) << 8) +
+			  spd->tras_min_lsb) * pdimm->mtb_ps;
+
+	/* min active to actice/refresh delay time */
+	pdimm->trc_ps = spd_to_ps((((spd->tras_trc_ext & 0xf0) << 4) +
+				   spd->trc_min_lsb), spd->fine_trc_min);
+	/* Min Refresh Recovery Delay Time */
+	pdimm->trfc1_ps = ((spd->trfc1_min_msb << 8) | (spd->trfc1_min_lsb)) *
+		       pdimm->mtb_ps;
+	pdimm->trfc2_ps = ((spd->trfc2_min_msb << 8) | (spd->trfc2_min_lsb)) *
+		       pdimm->mtb_ps;
+	pdimm->trfc4_ps = ((spd->trfc4_min_msb << 8) | (spd->trfc4_min_lsb)) *
+			pdimm->mtb_ps;
+	/* min four active window delay time */
+	pdimm->tfaw_ps = (((spd->tfaw_msb & 0xf) << 8) | spd->tfaw_min) *
+			pdimm->mtb_ps;
+
+	/* min row active to row active delay time, different bank group */
+	pdimm->trrds_ps = spd_to_ps(spd->trrds_min, spd->fine_trrds_min);
+	/* min row active to row active delay time, same bank group */
+	pdimm->trrdl_ps = spd_to_ps(spd->trrdl_min, spd->fine_trrdl_min);
+	/* min CAS to CAS Delay Time (tCCD_Lmin), same bank group */
+	pdimm->tccdl_ps = spd_to_ps(spd->tccdl_min, spd->fine_tccdl_min);
+
+	/*
+	 * Average periodic refresh interval
+	 * tREFI = 7.8 us at normal temperature range
+	 */
+	pdimm->refresh_rate_ps = 7800000;
+
+	for (i = 0; i < 18; i++)
+		pdimm->dq_mapping[i] = spd->mapping[i];
+
+	pdimm->dq_mapping_ors = ((spd->mapping[0] >> 6) & 0x3) == 0 ? 1 : 0;
+
+	return 0;
+}
diff --git a/drivers/ddr/fsl/fsl_ddr_gen4.c b/drivers/ddr/fsl/fsl_ddr_gen4.c
new file mode 100644
index 0000000..7cd878a
--- /dev/null
+++ b/drivers/ddr/fsl/fsl_ddr_gen4.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <fsl_ddr_sdram.h>
+#include <asm/processor.h>
+#include <fsl_ddr.h>
+
+#if (CONFIG_CHIP_SELECTS_PER_CTRL > 4)
+#error Invalid setting for CONFIG_CHIP_SELECTS_PER_CTRL
+#endif
+
+/*
+ * regs has the to-be-set values for DDR controller registers
+ * ctrl_num is the DDR controller number
+ * step: 0 goes through the initialization in one pass
+ *       1 sets registers and returns before enabling controller
+ *       2 resumes from step 1 and continues to initialize
+ * Dividing the initialization to two steps to deassert DDR reset signal
+ * to comply with JEDEC specs for RDIMMs.
+ */
+void fsl_ddr_set_memctl_regs(const fsl_ddr_cfg_regs_t *regs,
+			     unsigned int ctrl_num, int step)
+{
+	unsigned int i, bus_width;
+	struct ccsr_ddr __iomem *ddr;
+	u32 temp_sdram_cfg;
+	u32 total_gb_size_per_controller;
+	int timeout;
+
+	switch (ctrl_num) {
+	case 0:
+		ddr = (void *)CONFIG_SYS_FSL_DDR_ADDR;
+		break;
+#if defined(CONFIG_SYS_FSL_DDR2_ADDR) && (CONFIG_NUM_DDR_CONTROLLERS > 1)
+	case 1:
+		ddr = (void *)CONFIG_SYS_FSL_DDR2_ADDR;
+		break;
+#endif
+#if defined(CONFIG_SYS_FSL_DDR3_ADDR) && (CONFIG_NUM_DDR_CONTROLLERS > 2)
+	case 2:
+		ddr = (void *)CONFIG_SYS_FSL_DDR3_ADDR;
+		break;
+#endif
+#if defined(CONFIG_SYS_FSL_DDR4_ADDR) && (CONFIG_NUM_DDR_CONTROLLERS > 3)
+	case 3:
+		ddr = (void *)CONFIG_SYS_FSL_DDR4_ADDR;
+		break;
+#endif
+	default:
+		printf("%s unexpected ctrl_num = %u\n", __func__, ctrl_num);
+		return;
+	}
+
+	if (step == 2)
+		goto step2;
+
+	if (regs->ddr_eor)
+		ddr_out32(&ddr->eor, regs->ddr_eor);
+
+	ddr_out32(&ddr->sdram_clk_cntl, regs->ddr_sdram_clk_cntl);
+
+	for (i = 0; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
+		if (i == 0) {
+			ddr_out32(&ddr->cs0_bnds, regs->cs[i].bnds);
+			ddr_out32(&ddr->cs0_config, regs->cs[i].config);
+			ddr_out32(&ddr->cs0_config_2, regs->cs[i].config_2);
+
+		} else if (i == 1) {
+			ddr_out32(&ddr->cs1_bnds, regs->cs[i].bnds);
+			ddr_out32(&ddr->cs1_config, regs->cs[i].config);
+			ddr_out32(&ddr->cs1_config_2, regs->cs[i].config_2);
+
+		} else if (i == 2) {
+			ddr_out32(&ddr->cs2_bnds, regs->cs[i].bnds);
+			ddr_out32(&ddr->cs2_config, regs->cs[i].config);
+			ddr_out32(&ddr->cs2_config_2, regs->cs[i].config_2);
+
+		} else if (i == 3) {
+			ddr_out32(&ddr->cs3_bnds, regs->cs[i].bnds);
+			ddr_out32(&ddr->cs3_config, regs->cs[i].config);
+			ddr_out32(&ddr->cs3_config_2, regs->cs[i].config_2);
+		}
+	}
+
+	ddr_out32(&ddr->timing_cfg_3, regs->timing_cfg_3);
+	ddr_out32(&ddr->timing_cfg_0, regs->timing_cfg_0);
+	ddr_out32(&ddr->timing_cfg_1, regs->timing_cfg_1);
+	ddr_out32(&ddr->timing_cfg_2, regs->timing_cfg_2);
+	ddr_out32(&ddr->timing_cfg_4, regs->timing_cfg_4);
+	ddr_out32(&ddr->timing_cfg_5, regs->timing_cfg_5);
+	ddr_out32(&ddr->timing_cfg_6, regs->timing_cfg_6);
+	ddr_out32(&ddr->timing_cfg_7, regs->timing_cfg_7);
+	ddr_out32(&ddr->timing_cfg_8, regs->timing_cfg_8);
+	ddr_out32(&ddr->timing_cfg_9, regs->timing_cfg_9);
+	ddr_out32(&ddr->ddr_zq_cntl, regs->ddr_zq_cntl);
+	ddr_out32(&ddr->dq_map_0, regs->dq_map_0);
+	ddr_out32(&ddr->dq_map_1, regs->dq_map_1);
+	ddr_out32(&ddr->dq_map_2, regs->dq_map_2);
+	ddr_out32(&ddr->dq_map_3, regs->dq_map_3);
+	ddr_out32(&ddr->sdram_cfg_2, regs->ddr_sdram_cfg_2);
+	ddr_out32(&ddr->sdram_cfg_3, regs->ddr_sdram_cfg_3);
+	ddr_out32(&ddr->sdram_mode, regs->ddr_sdram_mode);
+	ddr_out32(&ddr->sdram_mode_2, regs->ddr_sdram_mode_2);
+	ddr_out32(&ddr->sdram_mode_3, regs->ddr_sdram_mode_3);
+	ddr_out32(&ddr->sdram_mode_4, regs->ddr_sdram_mode_4);
+	ddr_out32(&ddr->sdram_mode_5, regs->ddr_sdram_mode_5);
+	ddr_out32(&ddr->sdram_mode_6, regs->ddr_sdram_mode_6);
+	ddr_out32(&ddr->sdram_mode_7, regs->ddr_sdram_mode_7);
+	ddr_out32(&ddr->sdram_mode_8, regs->ddr_sdram_mode_8);
+	ddr_out32(&ddr->sdram_mode_9, regs->ddr_sdram_mode_9);
+	ddr_out32(&ddr->sdram_mode_10, regs->ddr_sdram_mode_10);
+	ddr_out32(&ddr->sdram_mode_11, regs->ddr_sdram_mode_11);
+	ddr_out32(&ddr->sdram_mode_12, regs->ddr_sdram_mode_12);
+	ddr_out32(&ddr->sdram_mode_13, regs->ddr_sdram_mode_13);
+	ddr_out32(&ddr->sdram_mode_14, regs->ddr_sdram_mode_14);
+	ddr_out32(&ddr->sdram_mode_15, regs->ddr_sdram_mode_15);
+	ddr_out32(&ddr->sdram_mode_16, regs->ddr_sdram_mode_16);
+	ddr_out32(&ddr->sdram_md_cntl, regs->ddr_sdram_md_cntl);
+	ddr_out32(&ddr->sdram_interval, regs->ddr_sdram_interval);
+	ddr_out32(&ddr->sdram_data_init, regs->ddr_data_init);
+	ddr_out32(&ddr->init_addr, regs->ddr_init_addr);
+	ddr_out32(&ddr->init_ext_addr, regs->ddr_init_ext_addr);
+	ddr_out32(&ddr->ddr_wrlvl_cntl, regs->ddr_wrlvl_cntl);
+#ifndef CONFIG_SYS_FSL_DDR_EMU
+	/*
+	 * Skip these two registers if running on emulator
+	 * because emulator doesn't have skew between bytes.
+	 */
+
+	if (regs->ddr_wrlvl_cntl_2)
+		ddr_out32(&ddr->ddr_wrlvl_cntl_2, regs->ddr_wrlvl_cntl_2);
+	if (regs->ddr_wrlvl_cntl_3)
+		ddr_out32(&ddr->ddr_wrlvl_cntl_3, regs->ddr_wrlvl_cntl_3);
+#endif
+
+	ddr_out32(&ddr->ddr_sr_cntr, regs->ddr_sr_cntr);
+	ddr_out32(&ddr->ddr_sdram_rcw_1, regs->ddr_sdram_rcw_1);
+	ddr_out32(&ddr->ddr_sdram_rcw_2, regs->ddr_sdram_rcw_2);
+	ddr_out32(&ddr->ddr_sdram_rcw_3, regs->ddr_sdram_rcw_3);
+	ddr_out32(&ddr->ddr_sdram_rcw_4, regs->ddr_sdram_rcw_4);
+	ddr_out32(&ddr->ddr_sdram_rcw_5, regs->ddr_sdram_rcw_5);
+	ddr_out32(&ddr->ddr_sdram_rcw_6, regs->ddr_sdram_rcw_6);
+	ddr_out32(&ddr->ddr_cdr1, regs->ddr_cdr1);
+	ddr_out32(&ddr->ddr_cdr2, regs->ddr_cdr2);
+	ddr_out32(&ddr->err_disable, regs->err_disable);
+	ddr_out32(&ddr->err_int_en, regs->err_int_en);
+	for (i = 0; i < 32; i++) {
+		if (regs->debug[i]) {
+			debug("Write to debug_%d as %08x\n",
+			      i+1, regs->debug[i]);
+			ddr_out32(&ddr->debug[i], regs->debug[i]);
+		}
+	}
+
+	/*
+	 * For RDIMMs, JEDEC spec requires clocks to be stable before reset is
+	 * deasserted. Clocks start when any chip select is enabled and clock
+	 * control register is set. Because all DDR components are connected to
+	 * one reset signal, this needs to be done in two steps. Step 1 is to
+	 * get the clocks started. Step 2 resumes after reset signal is
+	 * deasserted.
+	 */
+	if (step == 1) {
+		udelay(200);
+		return;
+	}
+
+step2:
+	/* Set, but do not enable the memory */
+	temp_sdram_cfg = regs->ddr_sdram_cfg;
+	temp_sdram_cfg &= ~(SDRAM_CFG_MEM_EN);
+	ddr_out32(&ddr->sdram_cfg, temp_sdram_cfg);
+
+	/*
+	 * 500 painful micro-seconds must elapse between
+	 * the DDR clock setup and the DDR config enable.
+	 * DDR2 need 200 us, and DDR3 need 500 us from spec,
+	 * we choose the max, that is 500 us for all of case.
+	 */
+	udelay(500);
+	asm volatile("sync;isync");
+
+	/* Let the controller go */
+	temp_sdram_cfg = ddr_in32(&ddr->sdram_cfg) & ~SDRAM_CFG_BI;
+	ddr_out32(&ddr->sdram_cfg, temp_sdram_cfg | SDRAM_CFG_MEM_EN);
+	asm volatile("sync;isync");
+
+	total_gb_size_per_controller = 0;
+	for (i = 0; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
+		if (!(regs->cs[i].config & 0x80000000))
+			continue;
+		total_gb_size_per_controller += 1 << (
+			((regs->cs[i].config >> 14) & 0x3) + 2 +
+			((regs->cs[i].config >> 8) & 0x7) + 12 +
+			((regs->cs[i].config >> 4) & 0x3) + 0 +
+			((regs->cs[i].config >> 0) & 0x7) + 8 +
+			3 - ((regs->ddr_sdram_cfg >> 19) & 0x3) -
+			26);			/* minus 26 (count of 64M) */
+	}
+	if (fsl_ddr_get_intl3r() & 0x80000000)	/* 3-way interleaving */
+		total_gb_size_per_controller *= 3;
+	else if (regs->cs[0].config & 0x20000000) /* 2-way interleaving */
+		total_gb_size_per_controller <<= 1;
+	/*
+	 * total memory / bus width = transactions needed
+	 * transactions needed / data rate = seconds
+	 * to add plenty of buffer, double the time
+	 * For example, 2GB on 666MT/s 64-bit bus takes about 402ms
+	 * Let's wait for 800ms
+	 */
+	bus_width = 3 - ((ddr->sdram_cfg & SDRAM_CFG_DBW_MASK)
+			>> SDRAM_CFG_DBW_SHIFT);
+	timeout = ((total_gb_size_per_controller << (6 - bus_width)) * 100 /
+		(get_ddr_freq(0) >> 20)) << 2;
+	total_gb_size_per_controller >>= 4;	/* shift down to gb size */
+	debug("total %d GB\n", total_gb_size_per_controller);
+	debug("Need to wait up to %d * 10ms\n", timeout);
+
+	/* Poll DDR_SDRAM_CFG_2[D_INIT] bit until auto-data init is done.  */
+	while ((ddr_in32(&ddr->sdram_cfg_2) & SDRAM_CFG2_D_INIT) &&
+		(timeout >= 0)) {
+		udelay(10000);		/* throttle polling rate */
+		timeout--;
+	}
+
+	if (timeout <= 0)
+		printf("Waiting for D_INIT timeout. Memory may not work.\n");
+
+}
diff --git a/drivers/ddr/fsl/interactive.c b/drivers/ddr/fsl/interactive.c
index ebf3ed6..cfe1e1f 100644
--- a/drivers/ddr/fsl/interactive.c
+++ b/drivers/ddr/fsl/interactive.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010-2012 Freescale Semiconductor, Inc.
+ * Copyright 2010-2014 Freescale Semiconductor, Inc.
  *
  * SPDX-License-Identifier:	GPL-2.0+
  */
@@ -153,25 +153,38 @@
 	static const struct options_string options[] = {
 		COMMON_TIMING(tckmin_x_ps),
 		COMMON_TIMING(tckmax_ps),
-		COMMON_TIMING(tckmax_max_ps),
+		COMMON_TIMING(taamin_ps),
 		COMMON_TIMING(trcd_ps),
 		COMMON_TIMING(trp_ps),
 		COMMON_TIMING(tras_ps),
-		COMMON_TIMING(twr_ps),
+
+#ifdef CONFIG_SYS_FSL_DDR4
+		COMMON_TIMING(trfc1_ps),
+		COMMON_TIMING(trfc2_ps),
+		COMMON_TIMING(trfc4_ps),
+		COMMON_TIMING(trrds_ps),
+		COMMON_TIMING(trrdl_ps),
+		COMMON_TIMING(tccdl_ps),
+#else
 		COMMON_TIMING(twtr_ps),
 		COMMON_TIMING(trfc_ps),
 		COMMON_TIMING(trrd_ps),
+		COMMON_TIMING(trtp_ps),
+#endif
+		COMMON_TIMING(twr_ps),
 		COMMON_TIMING(trc_ps),
 		COMMON_TIMING(refresh_rate_ps),
+		COMMON_TIMING(extended_op_srt),
+#if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2)
 		COMMON_TIMING(tis_ps),
 		COMMON_TIMING(tih_ps),
 		COMMON_TIMING(tds_ps),
 		COMMON_TIMING(tdh_ps),
-		COMMON_TIMING(trtp_ps),
 		COMMON_TIMING(tdqsq_max_ps),
 		COMMON_TIMING(tqhs_ps),
+#endif
 		COMMON_TIMING(ndimms_present),
-		COMMON_TIMING(lowest_common_SPD_caslat),
+		COMMON_TIMING(lowest_common_spd_caslat),
 		COMMON_TIMING(highest_common_derated_caslat),
 		COMMON_TIMING(additive_latency),
 		COMMON_TIMING(all_dimms_burst_lengths_bitmask),
@@ -211,7 +224,12 @@
 		DIMM_PARM(n_row_addr),
 		DIMM_PARM(n_col_addr),
 		DIMM_PARM(edc_config),
+#ifdef CONFIG_SYS_FSL_DDR4
+		DIMM_PARM(bank_addr_bits),
+		DIMM_PARM(bank_group_bits),
+#else
 		DIMM_PARM(n_banks_per_sdram_device),
+#endif
 		DIMM_PARM(burst_lengths_bitmask),
 		DIMM_PARM(row_density),
 
@@ -229,20 +247,32 @@
 		DIMM_PARM(trcd_ps),
 		DIMM_PARM(trp_ps),
 		DIMM_PARM(tras_ps),
+#ifdef CONFIG_SYS_FSL_DDR4
+		DIMM_PARM(trfc1_ps),
+		DIMM_PARM(trfc2_ps),
+		DIMM_PARM(trfc4_ps),
+		DIMM_PARM(trrds_ps),
+		DIMM_PARM(trrdl_ps),
+		DIMM_PARM(tccdl_ps),
+#else
 		DIMM_PARM(twr_ps),
 		DIMM_PARM(twtr_ps),
 		DIMM_PARM(trfc_ps),
 		DIMM_PARM(trrd_ps),
+		DIMM_PARM(trtp_ps),
+#endif
 		DIMM_PARM(trc_ps),
 		DIMM_PARM(refresh_rate_ps),
+		DIMM_PARM(extended_op_srt),
 
+#if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2)
 		DIMM_PARM(tis_ps),
 		DIMM_PARM(tih_ps),
 		DIMM_PARM(tds_ps),
 		DIMM_PARM(tdh_ps),
-		DIMM_PARM(trtp_ps),
 		DIMM_PARM(tdqsq_max_ps),
 		DIMM_PARM(tqhs_ps),
+#endif
 
 		DIMM_PARM(rank_density),
 		DIMM_PARM(capacity),
@@ -270,7 +300,12 @@
 		DIMM_PARM(n_row_addr),
 		DIMM_PARM(n_col_addr),
 		DIMM_PARM(edc_config),
+#ifdef CONFIG_SYS_FSL_DDR4
+		DIMM_PARM(bank_addr_bits),
+		DIMM_PARM(bank_group_bits),
+#else
 		DIMM_PARM(n_banks_per_sdram_device),
+#endif
 
 		DIMM_PARM(tckmin_x_ps),
 		DIMM_PARM(tckmin_x_minus_1_ps),
@@ -286,20 +321,31 @@
 		DIMM_PARM(trcd_ps),
 		DIMM_PARM(trp_ps),
 		DIMM_PARM(tras_ps),
+#ifdef CONFIG_SYS_FSL_DDR4
+		DIMM_PARM(trfc1_ps),
+		DIMM_PARM(trfc2_ps),
+		DIMM_PARM(trfc4_ps),
+		DIMM_PARM(trrds_ps),
+		DIMM_PARM(trrdl_ps),
+		DIMM_PARM(tccdl_ps),
+#else
 		DIMM_PARM(twr_ps),
 		DIMM_PARM(twtr_ps),
 		DIMM_PARM(trfc_ps),
 		DIMM_PARM(trrd_ps),
+		DIMM_PARM(trtp_ps),
+#endif
 		DIMM_PARM(trc_ps),
 		DIMM_PARM(refresh_rate_ps),
 
+#if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2)
 		DIMM_PARM(tis_ps),
 		DIMM_PARM(tih_ps),
 		DIMM_PARM(tds_ps),
 		DIMM_PARM(tdh_ps),
-		DIMM_PARM(trtp_ps),
 		DIMM_PARM(tdqsq_max_ps),
 		DIMM_PARM(tqhs_ps),
+#endif
 	};
 	static const unsigned int n_opts = ARRAY_SIZE(options);
 
@@ -326,23 +372,36 @@
 		const common_timing_params_t *plcd_dimm_params)
 {
 	static const struct options_string options[] = {
-		COMMON_TIMING(tckmax_max_ps),
+		COMMON_TIMING(taamin_ps),
 		COMMON_TIMING(trcd_ps),
 		COMMON_TIMING(trp_ps),
 		COMMON_TIMING(tras_ps),
-		COMMON_TIMING(twr_ps),
+#ifdef CONFIG_SYS_FSL_DDR4
+		COMMON_TIMING(trfc1_ps),
+		COMMON_TIMING(trfc2_ps),
+		COMMON_TIMING(trfc4_ps),
+		COMMON_TIMING(trrds_ps),
+		COMMON_TIMING(trrdl_ps),
+		COMMON_TIMING(tccdl_ps),
+#else
 		COMMON_TIMING(twtr_ps),
 		COMMON_TIMING(trfc_ps),
 		COMMON_TIMING(trrd_ps),
+		COMMON_TIMING(trtp_ps),
+#endif
+		COMMON_TIMING(twr_ps),
 		COMMON_TIMING(trc_ps),
 		COMMON_TIMING(refresh_rate_ps),
+		COMMON_TIMING(extended_op_srt),
+#if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2)
 		COMMON_TIMING(tis_ps),
+		COMMON_TIMING(tih_ps),
 		COMMON_TIMING(tds_ps),
 		COMMON_TIMING(tdh_ps),
-		COMMON_TIMING(trtp_ps),
 		COMMON_TIMING(tdqsq_max_ps),
 		COMMON_TIMING(tqhs_ps),
-		COMMON_TIMING(lowest_common_SPD_caslat),
+#endif
+		COMMON_TIMING(lowest_common_spd_caslat),
 		COMMON_TIMING(highest_common_derated_caslat),
 		COMMON_TIMING(additive_latency),
 		COMMON_TIMING(ndimms_present),
@@ -460,6 +519,9 @@
 		CTRL_OPTIONS(tfaw_window_four_activates_ps),
 		CTRL_OPTIONS(trwt_override),
 		CTRL_OPTIONS(trwt),
+		CTRL_OPTIONS(rtt_override),
+		CTRL_OPTIONS(rtt_override_value),
+		CTRL_OPTIONS(rtt_wr_override_value),
 	};
 
 	static const unsigned int n_opts = ARRAY_SIZE(options);
@@ -505,6 +567,7 @@
 		CFG_REGS(timing_cfg_2),
 		CFG_REGS(ddr_sdram_cfg),
 		CFG_REGS(ddr_sdram_cfg_2),
+		CFG_REGS(ddr_sdram_cfg_3),
 		CFG_REGS(ddr_sdram_mode),
 		CFG_REGS(ddr_sdram_mode_2),
 		CFG_REGS(ddr_sdram_mode_3),
@@ -513,6 +576,16 @@
 		CFG_REGS(ddr_sdram_mode_6),
 		CFG_REGS(ddr_sdram_mode_7),
 		CFG_REGS(ddr_sdram_mode_8),
+#ifdef CONFIG_SYS_FSL_DDR4
+		CFG_REGS(ddr_sdram_mode_9),
+		CFG_REGS(ddr_sdram_mode_10),
+		CFG_REGS(ddr_sdram_mode_11),
+		CFG_REGS(ddr_sdram_mode_12),
+		CFG_REGS(ddr_sdram_mode_13),
+		CFG_REGS(ddr_sdram_mode_14),
+		CFG_REGS(ddr_sdram_mode_15),
+		CFG_REGS(ddr_sdram_mode_16),
+#endif
 		CFG_REGS(ddr_sdram_interval),
 		CFG_REGS(ddr_data_init),
 		CFG_REGS(ddr_sdram_clk_cntl),
@@ -520,6 +593,12 @@
 		CFG_REGS(ddr_init_ext_addr),
 		CFG_REGS(timing_cfg_4),
 		CFG_REGS(timing_cfg_5),
+#ifdef CONFIG_SYS_FSL_DDR4
+		CFG_REGS(timing_cfg_6),
+		CFG_REGS(timing_cfg_7),
+		CFG_REGS(timing_cfg_8),
+		CFG_REGS(timing_cfg_9),
+#endif
 		CFG_REGS(ddr_zq_cntl),
 		CFG_REGS(ddr_wrlvl_cntl),
 		CFG_REGS(ddr_wrlvl_cntl_2),
@@ -529,6 +608,10 @@
 		CFG_REGS(ddr_sdram_rcw_2),
 		CFG_REGS(ddr_cdr1),
 		CFG_REGS(ddr_cdr2),
+		CFG_REGS(dq_map_0),
+		CFG_REGS(dq_map_1),
+		CFG_REGS(dq_map_2),
+		CFG_REGS(dq_map_3),
 		CFG_REGS(err_disable),
 		CFG_REGS(err_int_en),
 		CFG_REGS(ddr_eor),
@@ -574,6 +657,7 @@
 		CFG_REGS(timing_cfg_2),
 		CFG_REGS(ddr_sdram_cfg),
 		CFG_REGS(ddr_sdram_cfg_2),
+		CFG_REGS(ddr_sdram_cfg_3),
 		CFG_REGS(ddr_sdram_mode),
 		CFG_REGS(ddr_sdram_mode_2),
 		CFG_REGS(ddr_sdram_mode_3),
@@ -582,6 +666,16 @@
 		CFG_REGS(ddr_sdram_mode_6),
 		CFG_REGS(ddr_sdram_mode_7),
 		CFG_REGS(ddr_sdram_mode_8),
+#ifdef CONFIG_SYS_FSL_DDR4
+		CFG_REGS(ddr_sdram_mode_9),
+		CFG_REGS(ddr_sdram_mode_10),
+		CFG_REGS(ddr_sdram_mode_11),
+		CFG_REGS(ddr_sdram_mode_12),
+		CFG_REGS(ddr_sdram_mode_13),
+		CFG_REGS(ddr_sdram_mode_14),
+		CFG_REGS(ddr_sdram_mode_15),
+		CFG_REGS(ddr_sdram_mode_16),
+#endif
 		CFG_REGS(ddr_sdram_interval),
 		CFG_REGS(ddr_data_init),
 		CFG_REGS(ddr_sdram_clk_cntl),
@@ -589,6 +683,12 @@
 		CFG_REGS(ddr_init_ext_addr),
 		CFG_REGS(timing_cfg_4),
 		CFG_REGS(timing_cfg_5),
+#ifdef CONFIG_SYS_FSL_DDR4
+		CFG_REGS(timing_cfg_6),
+		CFG_REGS(timing_cfg_7),
+		CFG_REGS(timing_cfg_8),
+		CFG_REGS(timing_cfg_9),
+#endif
 		CFG_REGS(ddr_zq_cntl),
 		CFG_REGS(ddr_wrlvl_cntl),
 		CFG_REGS(ddr_wrlvl_cntl_2),
@@ -598,6 +698,10 @@
 		CFG_REGS(ddr_sdram_rcw_2),
 		CFG_REGS(ddr_cdr1),
 		CFG_REGS(ddr_cdr2),
+		CFG_REGS(dq_map_0),
+		CFG_REGS(dq_map_1),
+		CFG_REGS(dq_map_2),
+		CFG_REGS(dq_map_3),
 		CFG_REGS(err_disable),
 		CFG_REGS(err_int_en),
 		CFG_REGS(ddr_sdram_rcw_2),
@@ -705,6 +809,9 @@
 		CTRL_OPTIONS(tfaw_window_four_activates_ps),
 		CTRL_OPTIONS(trwt_override),
 		CTRL_OPTIONS(trwt),
+		CTRL_OPTIONS(rtt_override),
+		CTRL_OPTIONS(rtt_override_value),
+		CTRL_OPTIONS(rtt_wr_override_value),
 	};
 	static const unsigned int n_opts = ARRAY_SIZE(options);
 
@@ -1245,6 +1352,266 @@
 }
 #endif
 
+#ifdef CONFIG_SYS_FSL_DDR4
+void ddr4_spd_dump(const struct ddr4_spd_eeprom_s *spd)
+{
+	unsigned int i;
+
+	/* General Section: Bytes 0-127 */
+
+#define PRINT_NXS(x, y, z...) printf("%-3d    : %02x " z "\n", x, (u8)y);
+#define PRINT_NNXXS(n0, n1, x0, x1, s) \
+	printf("%-3d-%3d: %02x %02x " s "\n", n0, n1, x0, x1);
+
+	PRINT_NXS(0, spd->info_size_crc,
+		  "info_size_crc  bytes written into serial memory, CRC coverage");
+	PRINT_NXS(1, spd->spd_rev,
+		  "spd_rev        SPD Revision");
+	PRINT_NXS(2, spd->mem_type,
+		  "mem_type       Key Byte / DRAM Device Type");
+	PRINT_NXS(3, spd->module_type,
+		  "module_type    Key Byte / Module Type");
+	PRINT_NXS(4, spd->density_banks,
+		  "density_banks  SDRAM Density and Banks");
+	PRINT_NXS(5, spd->addressing,
+		  "addressing     SDRAM Addressing");
+	PRINT_NXS(6, spd->package_type,
+		  "package_type   Package type");
+	PRINT_NXS(7, spd->opt_feature,
+		  "opt_feature    Optional features");
+	PRINT_NXS(8, spd->thermal_ref,
+		  "thermal_ref    Thermal and Refresh options");
+	PRINT_NXS(9, spd->oth_opt_features,
+		  "oth_opt_features Other SDRAM optional features");
+	PRINT_NXS(10, spd->res_10,
+		  "res_10         Reserved");
+	PRINT_NXS(11, spd->module_vdd,
+		  "module_vdd     Module Nominal Voltage, VDD");
+	PRINT_NXS(12, spd->organization,
+		  "organization Module Organization");
+	PRINT_NXS(13, spd->bus_width,
+		  "bus_width      Module Memory Bus Width");
+	PRINT_NXS(14, spd->therm_sensor,
+		  "therm_sensor   Module Thermal Sensor");
+	PRINT_NXS(15, spd->ext_type,
+		  "ext_type       Extended module type");
+	PRINT_NXS(16, spd->res_16,
+		  "res_16       Reserved");
+	PRINT_NXS(17, spd->timebases,
+		  "timebases    MTb and FTB");
+	PRINT_NXS(18, spd->tck_min,
+		  "tck_min      tCKAVGmin");
+	PRINT_NXS(19, spd->tck_max,
+		  "tck_max      TCKAVGmax");
+	PRINT_NXS(20, spd->caslat_b1,
+		  "caslat_b1    CAS latencies, 1st byte");
+	PRINT_NXS(21, spd->caslat_b2,
+		  "caslat_b2    CAS latencies, 2nd byte");
+	PRINT_NXS(22, spd->caslat_b3,
+		  "caslat_b3    CAS latencies, 3rd byte ");
+	PRINT_NXS(23, spd->caslat_b4,
+		  "caslat_b4    CAS latencies, 4th byte");
+	PRINT_NXS(24, spd->taa_min,
+		  "taa_min      Min CAS Latency Time");
+	PRINT_NXS(25, spd->trcd_min,
+		  "trcd_min     Min RAS# to CAS# Delay Time");
+	PRINT_NXS(26, spd->trp_min,
+		  "trp_min      Min Row Precharge Delay Time");
+	PRINT_NXS(27, spd->tras_trc_ext,
+		  "tras_trc_ext Upper Nibbles for tRAS and tRC");
+	PRINT_NXS(28, spd->tras_min_lsb,
+		  "tras_min_lsb tRASmin, lsb");
+	PRINT_NXS(29, spd->trc_min_lsb,
+		  "trc_min_lsb  tRCmin, lsb");
+	PRINT_NXS(30, spd->trfc1_min_lsb,
+		  "trfc1_min_lsb  Min Refresh Recovery Delay Time, LSB");
+	PRINT_NXS(31, spd->trfc1_min_msb,
+		  "trfc1_min_msb  Min Refresh Recovery Delay Time, MSB ");
+	PRINT_NXS(32, spd->trfc2_min_lsb,
+		  "trfc2_min_lsb  Min Refresh Recovery Delay Time, LSB");
+	PRINT_NXS(33, spd->trfc2_min_msb,
+		  "trfc2_min_msb  Min Refresh Recovery Delay Time, MSB");
+	PRINT_NXS(34, spd->trfc4_min_lsb,
+		  "trfc4_min_lsb Min Refresh Recovery Delay Time, LSB");
+	PRINT_NXS(35, spd->trfc4_min_msb,
+		  "trfc4_min_msb Min Refresh Recovery Delay Time, MSB");
+	PRINT_NXS(36, spd->tfaw_msb,
+		  "tfaw_msb      Upper Nibble for tFAW");
+	PRINT_NXS(37, spd->tfaw_min,
+		  "tfaw_min      tFAW, lsb");
+	PRINT_NXS(38, spd->trrds_min,
+		  "trrds_min     tRRD_Smin, MTB");
+	PRINT_NXS(39, spd->trrdl_min,
+		  "trrdl_min     tRRD_Lmin, MTB");
+	PRINT_NXS(40, spd->tccdl_min,
+		  "tccdl_min     tCCS_Lmin, MTB");
+
+	printf("%-3d-%3d: ", 41, 59);  /* Reserved, General Section */
+	for (i = 41; i <= 59; i++)
+		printf("%02x ", spd->res_41[i - 41]);
+
+	puts("\n");
+	printf("%-3d-%3d: ", 60, 77);
+	for (i = 60; i <= 77; i++)
+		printf("%02x ", spd->mapping[i - 60]);
+	puts("   mapping[] Connector to SDRAM bit map\n");
+
+	PRINT_NXS(117, spd->fine_tccdl_min,
+		  "fine_tccdl_min Fine offset for tCCD_Lmin");
+	PRINT_NXS(118, spd->fine_trrdl_min,
+		  "fine_trrdl_min Fine offset for tRRD_Lmin");
+	PRINT_NXS(119, spd->fine_trrds_min,
+		  "fine_trrds_min Fine offset for tRRD_Smin");
+	PRINT_NXS(120, spd->fine_trc_min,
+		  "fine_trc_min   Fine offset for tRCmin");
+	PRINT_NXS(121, spd->fine_trp_min,
+		  "fine_trp_min   Fine offset for tRPmin");
+	PRINT_NXS(122, spd->fine_trcd_min,
+		  "fine_trcd_min  Fine offset for tRCDmin");
+	PRINT_NXS(123, spd->fine_taa_min,
+		  "fine_taa_min   Fine offset for tAAmin");
+	PRINT_NXS(124, spd->fine_tck_max,
+		  "fine_tck_max   Fine offset for tCKAVGmax");
+	PRINT_NXS(125, spd->fine_tck_min,
+		  "fine_tck_min   Fine offset for tCKAVGmin");
+
+	/* CRC: Bytes 126-127 */
+	PRINT_NNXXS(126, 127, spd->crc[0], spd->crc[1], "  SPD CRC");
+
+	switch (spd->module_type) {
+	case 0x02:  /* UDIMM */
+	case 0x03:  /* SO-DIMM */
+		PRINT_NXS(128, spd->mod_section.unbuffered.mod_height,
+			  "mod_height    (Unbuffered) Module Nominal Height");
+		PRINT_NXS(129, spd->mod_section.unbuffered.mod_thickness,
+			  "mod_thickness (Unbuffered) Module Maximum Thickness");
+		PRINT_NXS(130, spd->mod_section.unbuffered.ref_raw_card,
+			  "ref_raw_card  (Unbuffered) Reference Raw Card Used");
+		PRINT_NXS(131, spd->mod_section.unbuffered.addr_mapping,
+			  "addr_mapping  (Unbuffered) Address mapping from Edge Connector to DRAM");
+		PRINT_NNXXS(254, 255, spd->mod_section.unbuffered.crc[0],
+			    spd->mod_section.unbuffered.crc[1], "  Module CRC");
+		break;
+	case 0x01:  /* RDIMM */
+		PRINT_NXS(128, spd->mod_section.registered.mod_height,
+			  "mod_height    (Registered) Module Nominal Height");
+		PRINT_NXS(129, spd->mod_section.registered.mod_thickness,
+			  "mod_thickness (Registered) Module Maximum Thickness");
+		PRINT_NXS(130, spd->mod_section.registered.ref_raw_card,
+			  "ref_raw_card  (Registered) Reference Raw Card Used");
+		PRINT_NXS(131, spd->mod_section.registered.modu_attr,
+			  "modu_attr     (Registered) DIMM Module Attributes");
+		PRINT_NXS(132, spd->mod_section.registered.thermal,
+			  "thermal       (Registered) Thermal Heat Spreader Solution");
+		PRINT_NXS(133, spd->mod_section.registered.reg_id_lo,
+			  "reg_id_lo     (Registered) Register Manufacturer ID Code, LSB");
+		PRINT_NXS(134, spd->mod_section.registered.reg_id_hi,
+			  "reg_id_hi     (Registered) Register Manufacturer ID Code, MSB");
+		PRINT_NXS(135, spd->mod_section.registered.reg_rev,
+			  "reg_rev       (Registered) Register Revision Number");
+		PRINT_NXS(136, spd->mod_section.registered.reg_map,
+			  "reg_map       (Registered) Address mapping");
+		PRINT_NNXXS(254, 255, spd->mod_section.registered.crc[0],
+			    spd->mod_section.registered.crc[1], "  Module CRC");
+		break;
+	case 0x04:  /* LRDIMM */
+		PRINT_NXS(128, spd->mod_section.loadreduced.mod_height,
+			  "mod_height    (Loadreduced) Module Nominal Height");
+		PRINT_NXS(129, spd->mod_section.loadreduced.mod_thickness,
+			  "mod_thickness (Loadreduced) Module Maximum Thickness");
+		PRINT_NXS(130, spd->mod_section.loadreduced.ref_raw_card,
+			  "ref_raw_card  (Loadreduced) Reference Raw Card Used");
+		PRINT_NXS(131, spd->mod_section.loadreduced.modu_attr,
+			  "modu_attr     (Loadreduced) DIMM Module Attributes");
+		PRINT_NXS(132, spd->mod_section.loadreduced.thermal,
+			  "thermal       (Loadreduced) Thermal Heat Spreader Solution");
+		PRINT_NXS(133, spd->mod_section.loadreduced.reg_id_lo,
+			  "reg_id_lo     (Loadreduced) Register Manufacturer ID Code, LSB");
+		PRINT_NXS(134, spd->mod_section.loadreduced.reg_id_hi,
+			  "reg_id_hi     (Loadreduced) Register Manufacturer ID Code, MSB");
+		PRINT_NXS(135, spd->mod_section.loadreduced.reg_rev,
+			  "reg_rev       (Loadreduced) Register Revision Number");
+		PRINT_NXS(136, spd->mod_section.loadreduced.reg_map,
+			  "reg_map       (Loadreduced) Address mapping");
+		PRINT_NXS(137, spd->mod_section.loadreduced.reg_drv,
+			  "reg_drv       (Loadreduced) Reg output drive strength");
+		PRINT_NXS(138, spd->mod_section.loadreduced.reg_drv_ck,
+			  "reg_drv_ck    (Loadreduced) Reg output drive strength for CK");
+		PRINT_NXS(139, spd->mod_section.loadreduced.data_buf_rev,
+			  "data_buf_rev  (Loadreduced) Data Buffer Revision Numbe");
+		PRINT_NXS(140, spd->mod_section.loadreduced.vrefqe_r0,
+			  "vrefqe_r0     (Loadreduced) DRAM VrefDQ for Package Rank 0");
+		PRINT_NXS(141, spd->mod_section.loadreduced.vrefqe_r1,
+			  "vrefqe_r1     (Loadreduced) DRAM VrefDQ for Package Rank 1");
+		PRINT_NXS(142, spd->mod_section.loadreduced.vrefqe_r2,
+			  "vrefqe_r2     (Loadreduced) DRAM VrefDQ for Package Rank 2");
+		PRINT_NXS(143, spd->mod_section.loadreduced.vrefqe_r3,
+			  "vrefqe_r3     (Loadreduced) DRAM VrefDQ for Package Rank 3");
+		PRINT_NXS(144, spd->mod_section.loadreduced.data_intf,
+			  "data_intf     (Loadreduced) Data Buffer VrefDQ for DRAM Interface");
+		PRINT_NXS(145, spd->mod_section.loadreduced.data_drv_1866,
+			  "data_drv_1866 (Loadreduced) Data Buffer MDQ Drive Strength and RTT");
+		PRINT_NXS(146, spd->mod_section.loadreduced.data_drv_2400,
+			  "data_drv_2400 (Loadreduced) Data Buffer MDQ Drive Strength and RTT");
+		PRINT_NXS(147, spd->mod_section.loadreduced.data_drv_3200,
+			  "data_drv_3200 (Loadreduced) Data Buffer MDQ Drive Strength and RTT");
+		PRINT_NXS(148, spd->mod_section.loadreduced.dram_drv,
+			  "dram_drv      (Loadreduced) DRAM Drive Strength");
+		PRINT_NXS(149, spd->mod_section.loadreduced.dram_odt_1866,
+			  "dram_odt_1866 (Loadreduced) DRAM ODT (RTT_WR, RTT_NOM)");
+		PRINT_NXS(150, spd->mod_section.loadreduced.dram_odt_2400,
+			  "dram_odt_2400 (Loadreduced) DRAM ODT (RTT_WR, RTT_NOM)");
+		PRINT_NXS(151, spd->mod_section.loadreduced.dram_odt_3200,
+			  "dram_odt_3200 (Loadreduced) DRAM ODT (RTT_WR, RTT_NOM)");
+		PRINT_NXS(152, spd->mod_section.loadreduced.dram_odt_park_1866,
+			  "dram_odt_park_1866 (Loadreduced) DRAM ODT (RTT_PARK)");
+		PRINT_NXS(153, spd->mod_section.loadreduced.dram_odt_park_2400,
+			  "dram_odt_park_2400 (Loadreduced) DRAM ODT (RTT_PARK)");
+		PRINT_NXS(154, spd->mod_section.loadreduced.dram_odt_park_3200,
+			  "dram_odt_park_3200 (Loadreduced) DRAM ODT (RTT_PARK)");
+		PRINT_NNXXS(254, 255, spd->mod_section.loadreduced.crc[0],
+			    spd->mod_section.loadreduced.crc[1],
+			    "  Module CRC");
+		break;
+	default:
+		/* Module-specific Section, Unsupported Module Type */
+		printf("%-3d-%3d: ", 128, 255);
+
+		for (i = 128; i <= 255; i++)
+			printf("%02x", spd->mod_section.uc[i - 60]);
+
+		break;
+	}
+
+	/* Unique Module ID: Bytes 320-383 */
+	PRINT_NXS(320, spd->mmid_lsb, "Module MfgID Code LSB - JEP-106");
+	PRINT_NXS(321, spd->mmid_msb, "Module MfgID Code MSB - JEP-106");
+	PRINT_NXS(322, spd->mloc,     "Mfg Location");
+	PRINT_NNXXS(323, 324, spd->mdate[0], spd->mdate[1], "Mfg Date");
+
+	printf("%-3d-%3d: ", 325, 328);
+
+	for (i = 325; i <= 328; i++)
+		printf("%02x ", spd->sernum[i - 325]);
+	printf("   Module Serial Number\n");
+
+	printf("%-3d-%3d: ", 329, 348);
+	for (i = 329; i <= 348; i++)
+		printf("%02x ", spd->mpart[i - 329]);
+	printf("   Mfg's Module Part Number\n");
+
+	PRINT_NXS(349, spd->mrev, "Module Revision code");
+	PRINT_NXS(350, spd->dmid_lsb, "DRAM MfgID Code LSB - JEP-106");
+	PRINT_NXS(351, spd->dmid_msb, "DRAM MfgID Code MSB - JEP-106");
+	PRINT_NXS(352, spd->stepping, "DRAM stepping");
+
+	printf("%-3d-%3d: ", 353, 381);
+	for (i = 353; i <= 381; i++)
+		printf("%02x ", spd->msd[i - 353]);
+	printf("   Mfg's Specific Data\n");
+}
+#endif
+
 static inline void generic_spd_dump(const generic_spd_eeprom_t *spd)
 {
 #if defined(CONFIG_SYS_FSL_DDR1)
@@ -1253,6 +1620,8 @@
 	ddr2_spd_dump(spd);
 #elif defined(CONFIG_SYS_FSL_DDR3)
 	ddr3_spd_dump(spd);
+#elif defined(CONFIG_SYS_FSL_DDR4)
+	ddr4_spd_dump(spd);
 #endif
 }
 
diff --git a/drivers/ddr/fsl/lc_common_dimm_params.c b/drivers/ddr/fsl/lc_common_dimm_params.c
index 610318a..05a24dd 100644
--- a/drivers/ddr/fsl/lc_common_dimm_params.c
+++ b/drivers/ddr/fsl/lc_common_dimm_params.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008-2014 Freescale Semiconductor, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -11,20 +11,23 @@
 
 #include <fsl_ddr.h>
 
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 static unsigned int
-compute_cas_latency_ddr3(const dimm_params_t *dimm_params,
-			 common_timing_params_t *outpdimm,
-			 unsigned int number_of_dimms)
+compute_cas_latency(const dimm_params_t *dimm_params,
+		    common_timing_params_t *outpdimm,
+		    unsigned int number_of_dimms)
 {
 	unsigned int i;
-	unsigned int taamin_ps = 0;
-	unsigned int tckmin_x_ps = 0;
 	unsigned int common_caslat;
 	unsigned int caslat_actual;
 	unsigned int retry = 16;
 	unsigned int tmp;
 	const unsigned int mclk_ps = get_memory_clk_period_ps();
+#ifdef CONFIG_SYS_FSL_DDR3
+	const unsigned int taamax = 20000;
+#else
+	const unsigned int taamax = 18000;
+#endif
 
 	/* compute the common CAS latency supported between slots */
 	tmp = dimm_params[0].caslat_x;
@@ -34,19 +37,20 @@
 	}
 	common_caslat = tmp;
 
-	/* compute the max tAAmin tCKmin between slots */
-	for (i = 0; i < number_of_dimms; i++) {
-		taamin_ps = max(taamin_ps, dimm_params[i].taa_ps);
-		tckmin_x_ps = max(tckmin_x_ps, dimm_params[i].tckmin_x_ps);
-	}
 	/* validate if the memory clk is in the range of dimms */
-	if (mclk_ps < tckmin_x_ps) {
+	if (mclk_ps < outpdimm->tckmin_x_ps) {
 		printf("DDR clock (MCLK cycle %u ps) is faster than "
 			"the slowest DIMM(s) (tCKmin %u ps) can support.\n",
-			mclk_ps, tckmin_x_ps);
+			mclk_ps, outpdimm->tckmin_x_ps);
 	}
+#ifdef CONFIG_SYS_FSL_DDR4
+	if (mclk_ps > outpdimm->tckmax_ps) {
+		printf("DDR clock (MCLK cycle %u ps) is slower than DIMM(s) (tCKmax %u ps) can support.\n",
+		       mclk_ps, outpdimm->tckmax_ps);
+	}
+#endif
 	/* determine the acutal cas latency */
-	caslat_actual = (taamin_ps + mclk_ps - 1) / mclk_ps;
+	caslat_actual = (outpdimm->taamin_ps + mclk_ps - 1) / mclk_ps;
 	/* check if the dimms support the CAS latency */
 	while (!(common_caslat & (1 << caslat_actual)) && retry > 0) {
 		caslat_actual++;
@@ -54,13 +58,147 @@
 	}
 	/* once the caculation of caslat_actual is completed
 	 * we must verify that this CAS latency value does not
-	 * exceed tAAmax, which is 20 ns for all DDR3 speed grades
+	 * exceed tAAmax, which is 20 ns for all DDR3 speed grades,
+	 * 18ns for all DDR4 speed grades.
 	 */
-	if (caslat_actual * mclk_ps > 20000) {
+	if (caslat_actual * mclk_ps > taamax) {
 		printf("The choosen cas latency %d is too large\n",
 			caslat_actual);
 	}
-	outpdimm->lowest_common_SPD_caslat = caslat_actual;
+	outpdimm->lowest_common_spd_caslat = caslat_actual;
+	debug("lowest_common_spd_caslat is 0x%x\n", caslat_actual);
+
+	return 0;
+}
+#else	/* for DDR1 and DDR2 */
+static unsigned int
+compute_cas_latency(const dimm_params_t *dimm_params,
+		    common_timing_params_t *outpdimm,
+		    unsigned int number_of_dimms)
+{
+	int i;
+	const unsigned int mclk_ps = get_memory_clk_period_ps();
+	unsigned int lowest_good_caslat;
+	unsigned int not_ok;
+	unsigned int temp1, temp2;
+
+	debug("using mclk_ps = %u\n", mclk_ps);
+	if (mclk_ps > outpdimm->tckmax_ps) {
+		printf("Warning: DDR clock (%u ps) is slower than DIMM(s) (tCKmax %u ps)\n",
+		       mclk_ps, outpdimm->tckmax_ps);
+	}
+
+	/*
+	 * Compute a CAS latency suitable for all DIMMs
+	 *
+	 * Strategy for SPD-defined latencies: compute only
+	 * CAS latency defined by all DIMMs.
+	 */
+
+	/*
+	 * Step 1: find CAS latency common to all DIMMs using bitwise
+	 * operation.
+	 */
+	temp1 = 0xFF;
+	for (i = 0; i < number_of_dimms; i++) {
+		if (dimm_params[i].n_ranks) {
+			temp2 = 0;
+			temp2 |= 1 << dimm_params[i].caslat_x;
+			temp2 |= 1 << dimm_params[i].caslat_x_minus_1;
+			temp2 |= 1 << dimm_params[i].caslat_x_minus_2;
+			/*
+			 * If there was no entry for X-2 (X-1) in
+			 * the SPD, then caslat_x_minus_2
+			 * (caslat_x_minus_1) contains either 255 or
+			 * 0xFFFFFFFF because that's what the glorious
+			 * __ilog2 function returns for an input of 0.
+			 * On 32-bit PowerPC, left shift counts with bit
+			 * 26 set (that the value of 255 or 0xFFFFFFFF
+			 * will have), cause the destination register to
+			 * be 0.  That is why this works.
+			 */
+			temp1 &= temp2;
+		}
+	}
+
+	/*
+	 * Step 2: check each common CAS latency against tCK of each
+	 * DIMM's SPD.
+	 */
+	lowest_good_caslat = 0;
+	temp2 = 0;
+	while (temp1) {
+		not_ok = 0;
+		temp2 =  __ilog2(temp1);
+		debug("checking common caslat = %u\n", temp2);
+
+		/* Check if this CAS latency will work on all DIMMs at tCK. */
+		for (i = 0; i < number_of_dimms; i++) {
+			if (!dimm_params[i].n_ranks)
+				continue;
+
+			if (dimm_params[i].caslat_x == temp2) {
+				if (mclk_ps >= dimm_params[i].tckmin_x_ps) {
+					debug("CL = %u ok on DIMM %u at tCK=%u ps with tCKmin_X_ps of %u\n",
+					      temp2, i, mclk_ps,
+					      dimm_params[i].tckmin_x_ps);
+					continue;
+				} else {
+					not_ok++;
+				}
+			}
+
+			if (dimm_params[i].caslat_x_minus_1 == temp2) {
+				unsigned int tckmin_x_minus_1_ps
+					= dimm_params[i].tckmin_x_minus_1_ps;
+				if (mclk_ps >= tckmin_x_minus_1_ps) {
+					debug("CL = %u ok on DIMM %u at tCK=%u ps with tckmin_x_minus_1_ps of %u\n",
+					      temp2, i, mclk_ps,
+					      tckmin_x_minus_1_ps);
+					continue;
+				} else {
+					not_ok++;
+				}
+			}
+
+			if (dimm_params[i].caslat_x_minus_2 == temp2) {
+				unsigned int tckmin_x_minus_2_ps
+					= dimm_params[i].tckmin_x_minus_2_ps;
+				if (mclk_ps >= tckmin_x_minus_2_ps) {
+					debug("CL = %u ok on DIMM %u at tCK=%u ps with tckmin_x_minus_2_ps of %u\n",
+					      temp2, i, mclk_ps,
+					      tckmin_x_minus_2_ps);
+					continue;
+				} else {
+					not_ok++;
+				}
+			}
+		}
+
+		if (!not_ok)
+			lowest_good_caslat = temp2;
+
+		temp1 &= ~(1 << temp2);
+	}
+
+	debug("lowest common SPD-defined CAS latency = %u\n",
+	      lowest_good_caslat);
+	outpdimm->lowest_common_spd_caslat = lowest_good_caslat;
+
+
+	/*
+	 * Compute a common 'de-rated' CAS latency.
+	 *
+	 * The strategy here is to find the *highest* dereated cas latency
+	 * with the assumption that all of the DIMMs will support a dereated
+	 * CAS latency higher than or equal to their lowest dereated value.
+	 */
+	temp1 = 0;
+	for (i = 0; i < number_of_dimms; i++)
+		temp1 = max(temp1, dimm_params[i].caslat_lowest_derated);
+
+	outpdimm->highest_common_derated_caslat = temp1;
+	debug("highest common dereated CAS latency = %u\n", temp1);
 
 	return 0;
 }
@@ -82,34 +220,40 @@
 
 	unsigned int tckmin_x_ps = 0;
 	unsigned int tckmax_ps = 0xFFFFFFFF;
-	unsigned int tckmax_max_ps = 0;
 	unsigned int trcd_ps = 0;
 	unsigned int trp_ps = 0;
 	unsigned int tras_ps = 0;
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
+	unsigned int taamin_ps = 0;
+#endif
+#ifdef CONFIG_SYS_FSL_DDR4
+	unsigned int twr_ps = 15000;
+	unsigned int trfc1_ps = 0;
+	unsigned int trfc2_ps = 0;
+	unsigned int trfc4_ps = 0;
+	unsigned int trrds_ps = 0;
+	unsigned int trrdl_ps = 0;
+	unsigned int tccdl_ps = 0;
+#else
 	unsigned int twr_ps = 0;
 	unsigned int twtr_ps = 0;
 	unsigned int trfc_ps = 0;
 	unsigned int trrd_ps = 0;
+	unsigned int trtp_ps = 0;
+#endif
 	unsigned int trc_ps = 0;
 	unsigned int refresh_rate_ps = 0;
 	unsigned int extended_op_srt = 1;
+#if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2)
 	unsigned int tis_ps = 0;
 	unsigned int tih_ps = 0;
 	unsigned int tds_ps = 0;
 	unsigned int tdh_ps = 0;
-	unsigned int trtp_ps = 0;
 	unsigned int tdqsq_max_ps = 0;
 	unsigned int tqhs_ps = 0;
-
+#endif
 	unsigned int temp1, temp2;
 	unsigned int additive_latency = 0;
-#if !defined(CONFIG_SYS_FSL_DDR3)
-	const unsigned int mclk_ps = get_memory_clk_period_ps();
-	unsigned int lowest_good_caslat;
-	unsigned int not_ok;
-
-	debug("using mclk_ps = %u\n", mclk_ps);
-#endif
 
 	temp1 = 0;
 	for (i = 0; i < number_of_dimms; i++) {
@@ -146,31 +290,34 @@
 		 * i.e., this is the slowest the whole system can go.
 		 */
 		tckmax_ps = min(tckmax_ps, dimm_params[i].tckmax_ps);
-
-		/* Either find maximum value to determine slowest
-		 * speed, delay, time, period, etc */
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
+		taamin_ps = max(taamin_ps, dimm_params[i].taa_ps);
+#endif
 		tckmin_x_ps = max(tckmin_x_ps, dimm_params[i].tckmin_x_ps);
-		tckmax_max_ps = max(tckmax_max_ps, dimm_params[i].tckmax_ps);
 		trcd_ps = max(trcd_ps, dimm_params[i].trcd_ps);
 		trp_ps = max(trp_ps, dimm_params[i].trp_ps);
 		tras_ps = max(tras_ps, dimm_params[i].tras_ps);
+#ifdef CONFIG_SYS_FSL_DDR4
+		trfc1_ps = max(trfc1_ps, dimm_params[i].trfc1_ps);
+		trfc2_ps = max(trfc2_ps, dimm_params[i].trfc2_ps);
+		trfc4_ps = max(trfc4_ps, dimm_params[i].trfc4_ps);
+		trrds_ps = max(trrds_ps, dimm_params[i].trrds_ps);
+		trrdl_ps = max(trrdl_ps, dimm_params[i].trrdl_ps);
+		tccdl_ps = max(tccdl_ps, dimm_params[i].tccdl_ps);
+#else
 		twr_ps = max(twr_ps, dimm_params[i].twr_ps);
 		twtr_ps = max(twtr_ps, dimm_params[i].twtr_ps);
 		trfc_ps = max(trfc_ps, dimm_params[i].trfc_ps);
 		trrd_ps = max(trrd_ps, dimm_params[i].trrd_ps);
+		trtp_ps = max(trtp_ps, dimm_params[i].trtp_ps);
+#endif
 		trc_ps = max(trc_ps, dimm_params[i].trc_ps);
+#if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2)
 		tis_ps = max(tis_ps, dimm_params[i].tis_ps);
 		tih_ps = max(tih_ps, dimm_params[i].tih_ps);
 		tds_ps = max(tds_ps, dimm_params[i].tds_ps);
 		tdh_ps = max(tdh_ps, dimm_params[i].tdh_ps);
-		trtp_ps = max(trtp_ps, dimm_params[i].trtp_ps);
 		tqhs_ps = max(tqhs_ps, dimm_params[i].tqhs_ps);
-		refresh_rate_ps = max(refresh_rate_ps,
-				      dimm_params[i].refresh_rate_ps);
-		/* extended_op_srt is either 0 or 1, 0 having priority */
-		extended_op_srt = min(extended_op_srt,
-				      dimm_params[i].extended_op_srt);
-
 		/*
 		 * Find maximum tdqsq_max_ps to find slowest.
 		 *
@@ -178,6 +325,12 @@
 		 * strategy for this parameter?
 		 */
 		tdqsq_max_ps = max(tdqsq_max_ps, dimm_params[i].tdqsq_max_ps);
+#endif
+		refresh_rate_ps = max(refresh_rate_ps,
+				      dimm_params[i].refresh_rate_ps);
+		/* extended_op_srt is either 0 or 1, 0 having priority */
+		extended_op_srt = min(extended_op_srt,
+				      dimm_params[i].extended_op_srt);
 	}
 
 	outpdimm->ndimms_present = number_of_dimms - temp1;
@@ -189,24 +342,37 @@
 
 	outpdimm->tckmin_x_ps = tckmin_x_ps;
 	outpdimm->tckmax_ps = tckmax_ps;
-	outpdimm->tckmax_max_ps = tckmax_max_ps;
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
+	outpdimm->taamin_ps = taamin_ps;
+#endif
 	outpdimm->trcd_ps = trcd_ps;
 	outpdimm->trp_ps = trp_ps;
 	outpdimm->tras_ps = tras_ps;
-	outpdimm->twr_ps = twr_ps;
+#ifdef CONFIG_SYS_FSL_DDR4
+	outpdimm->trfc1_ps = trfc1_ps;
+	outpdimm->trfc2_ps = trfc2_ps;
+	outpdimm->trfc4_ps = trfc4_ps;
+	outpdimm->trrds_ps = trrds_ps;
+	outpdimm->trrdl_ps = trrdl_ps;
+	outpdimm->tccdl_ps = tccdl_ps;
+#else
 	outpdimm->twtr_ps = twtr_ps;
 	outpdimm->trfc_ps = trfc_ps;
 	outpdimm->trrd_ps = trrd_ps;
+	outpdimm->trtp_ps = trtp_ps;
+#endif
+	outpdimm->twr_ps = twr_ps;
 	outpdimm->trc_ps = trc_ps;
 	outpdimm->refresh_rate_ps = refresh_rate_ps;
 	outpdimm->extended_op_srt = extended_op_srt;
+#if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2)
 	outpdimm->tis_ps = tis_ps;
 	outpdimm->tih_ps = tih_ps;
 	outpdimm->tds_ps = tds_ps;
 	outpdimm->tdh_ps = tdh_ps;
-	outpdimm->trtp_ps = trtp_ps;
 	outpdimm->tdqsq_max_ps = tdqsq_max_ps;
 	outpdimm->tqhs_ps = tqhs_ps;
+#endif
 
 	/* Determine common burst length for all DIMMs. */
 	temp1 = 0xff;
@@ -265,128 +431,9 @@
 	if (temp1 != 0)
 		printf("ERROR: Mix different RDIMM detected!\n");
 
-#if defined(CONFIG_SYS_FSL_DDR3)
-	if (compute_cas_latency_ddr3(dimm_params, outpdimm, number_of_dimms))
+	/* calculate cas latency for all DDR types */
+	if (compute_cas_latency(dimm_params, outpdimm, number_of_dimms))
 		return 1;
-#else
-	/*
-	 * Compute a CAS latency suitable for all DIMMs
-	 *
-	 * Strategy for SPD-defined latencies: compute only
-	 * CAS latency defined by all DIMMs.
-	 */
-
-	/*
-	 * Step 1: find CAS latency common to all DIMMs using bitwise
-	 * operation.
-	 */
-	temp1 = 0xFF;
-	for (i = 0; i < number_of_dimms; i++) {
-		if (dimm_params[i].n_ranks) {
-			temp2 = 0;
-			temp2 |= 1 << dimm_params[i].caslat_x;
-			temp2 |= 1 << dimm_params[i].caslat_x_minus_1;
-			temp2 |= 1 << dimm_params[i].caslat_x_minus_2;
-			/*
-			 * FIXME: If there was no entry for X-2 (X-1) in
-			 * the SPD, then caslat_x_minus_2
-			 * (caslat_x_minus_1) contains either 255 or
-			 * 0xFFFFFFFF because that's what the glorious
-			 * __ilog2 function returns for an input of 0.
-			 * On 32-bit PowerPC, left shift counts with bit
-			 * 26 set (that the value of 255 or 0xFFFFFFFF
-			 * will have), cause the destination register to
-			 * be 0.  That is why this works.
-			 */
-			temp1 &= temp2;
-		}
-	}
-
-	/*
-	 * Step 2: check each common CAS latency against tCK of each
-	 * DIMM's SPD.
-	 */
-	lowest_good_caslat = 0;
-	temp2 = 0;
-	while (temp1) {
-		not_ok = 0;
-		temp2 =  __ilog2(temp1);
-		debug("checking common caslat = %u\n", temp2);
-
-		/* Check if this CAS latency will work on all DIMMs at tCK. */
-		for (i = 0; i < number_of_dimms; i++) {
-			if (!dimm_params[i].n_ranks) {
-				continue;
-			}
-			if (dimm_params[i].caslat_x == temp2) {
-				if (mclk_ps >= dimm_params[i].tckmin_x_ps) {
-					debug("CL = %u ok on DIMM %u at tCK=%u"
-					    " ps with its tCKmin_X_ps of %u\n",
-					       temp2, i, mclk_ps,
-					       dimm_params[i].tckmin_x_ps);
-					continue;
-				} else {
-					not_ok++;
-				}
-			}
-
-			if (dimm_params[i].caslat_x_minus_1 == temp2) {
-				unsigned int tckmin_x_minus_1_ps
-					= dimm_params[i].tckmin_x_minus_1_ps;
-				if (mclk_ps >= tckmin_x_minus_1_ps) {
-					debug("CL = %u ok on DIMM %u at "
-						"tCK=%u ps with its "
-						"tckmin_x_minus_1_ps of %u\n",
-					       temp2, i, mclk_ps,
-					       tckmin_x_minus_1_ps);
-					continue;
-				} else {
-					not_ok++;
-				}
-			}
-
-			if (dimm_params[i].caslat_x_minus_2 == temp2) {
-				unsigned int tckmin_x_minus_2_ps
-					= dimm_params[i].tckmin_x_minus_2_ps;
-				if (mclk_ps >= tckmin_x_minus_2_ps) {
-					debug("CL = %u ok on DIMM %u at "
-						"tCK=%u ps with its "
-						"tckmin_x_minus_2_ps of %u\n",
-					       temp2, i, mclk_ps,
-					       tckmin_x_minus_2_ps);
-					continue;
-				} else {
-					not_ok++;
-				}
-			}
-		}
-
-		if (!not_ok) {
-			lowest_good_caslat = temp2;
-		}
-
-		temp1 &= ~(1 << temp2);
-	}
-
-	debug("lowest common SPD-defined CAS latency = %u\n",
-	       lowest_good_caslat);
-	outpdimm->lowest_common_SPD_caslat = lowest_good_caslat;
-
-
-	/*
-	 * Compute a common 'de-rated' CAS latency.
-	 *
-	 * The strategy here is to find the *highest* dereated cas latency
-	 * with the assumption that all of the DIMMs will support a dereated
-	 * CAS latency higher than or equal to their lowest dereated value.
-	 */
-	temp1 = 0;
-	for (i = 0; i < number_of_dimms; i++) {
-		temp1 = max(temp1, dimm_params[i].caslat_lowest_derated);
-	}
-	outpdimm->highest_common_derated_caslat = temp1;
-	debug("highest common dereated CAS latency = %u\n", temp1);
-#endif /* #if defined(CONFIG_SYS_FSL_DDR3) */
 
 	/* Determine if all DIMMs ECC capable. */
 	temp1 = 1;
@@ -404,14 +451,6 @@
 	}
 	outpdimm->all_dimms_ecc_capable = temp1;
 
-#ifndef CONFIG_SYS_FSL_DDR3
-	/* FIXME: move to somewhere else to validate. */
-	if (mclk_ps > tckmax_max_ps) {
-		printf("Warning: some of the installed DIMMs "
-				"can not operate this slowly.\n");
-		return 1;
-	}
-#endif
 	/*
 	 * Compute additive latency.
 	 *
@@ -468,27 +507,20 @@
 	additive_latency = 0;
 
 #if defined(CONFIG_SYS_FSL_DDR2)
-	if (lowest_good_caslat < 4) {
-		additive_latency = (picos_to_mclk(trcd_ps) > lowest_good_caslat)
-			? picos_to_mclk(trcd_ps) - lowest_good_caslat : 0;
+	if ((outpdimm->lowest_common_spd_caslat < 4) &&
+	    (picos_to_mclk(trcd_ps) > outpdimm->lowest_common_spd_caslat)) {
+		additive_latency = picos_to_mclk(trcd_ps) -
+				   outpdimm->lowest_common_spd_caslat;
 		if (mclk_to_picos(additive_latency) > trcd_ps) {
 			additive_latency = picos_to_mclk(trcd_ps);
 			debug("setting additive_latency to %u because it was "
 				" greater than tRCD_ps\n", additive_latency);
 		}
 	}
-
-#elif defined(CONFIG_SYS_FSL_DDR3)
-	/*
-	 * The system will not use the global auto-precharge mode.
-	 * However, it uses the page mode, so we set AL=0
-	 */
-	additive_latency = 0;
 #endif
 
 	/*
 	 * Validate additive latency
-	 * FIXME: move to somewhere else to validate
 	 *
 	 * AL <= tRCD(min)
 	 */
@@ -516,10 +548,19 @@
 	debug("trcd_ps   = %u\n", outpdimm->trcd_ps);
 	debug("trp_ps    = %u\n", outpdimm->trp_ps);
 	debug("tras_ps   = %u\n", outpdimm->tras_ps);
-	debug("twr_ps    = %u\n", outpdimm->twr_ps);
+#ifdef CONFIG_SYS_FSL_DDR4
+	debug("trfc1_ps = %u\n", trfc1_ps);
+	debug("trfc2_ps = %u\n", trfc2_ps);
+	debug("trfc4_ps = %u\n", trfc4_ps);
+	debug("trrds_ps = %u\n", trrds_ps);
+	debug("trrdl_ps = %u\n", trrdl_ps);
+	debug("tccdl_ps = %u\n", tccdl_ps);
+#else
 	debug("twtr_ps   = %u\n", outpdimm->twtr_ps);
 	debug("trfc_ps   = %u\n", outpdimm->trfc_ps);
 	debug("trrd_ps   = %u\n", outpdimm->trrd_ps);
+#endif
+	debug("twr_ps    = %u\n", outpdimm->twr_ps);
 	debug("trc_ps    = %u\n", outpdimm->trc_ps);
 
 	return 0;
diff --git a/drivers/ddr/fsl/main.c b/drivers/ddr/fsl/main.c
index d62ca63..f95704f 100644
--- a/drivers/ddr/fsl/main.c
+++ b/drivers/ddr/fsl/main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008-2014 Freescale Semiconductor, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -81,14 +81,37 @@
 
 #endif
 
+#define SPD_SPA0_ADDRESS	0x36
+#define SPD_SPA1_ADDRESS	0x37
+
 static void __get_spd(generic_spd_eeprom_t *spd, u8 i2c_address)
 {
 	int ret;
+#ifdef CONFIG_SYS_FSL_DDR4
+	uint8_t dummy = 0;
+#endif
 
 	i2c_set_bus_num(CONFIG_SYS_SPD_BUS_NUM);
 
+#ifdef CONFIG_SYS_FSL_DDR4
+	/*
+	 * DDR4 SPD has 384 to 512 bytes
+	 * To access the lower 256 bytes, we need to set EE page address to 0
+	 * To access the upper 256 bytes, we need to set EE page address to 1
+	 * See Jedec standar No. 21-C for detail
+	 */
+	i2c_write(SPD_SPA0_ADDRESS, 0, 1, &dummy, 1);
+	ret = i2c_read(i2c_address, 0, 1, (uchar *)spd, 256);
+	if (!ret) {
+		i2c_write(SPD_SPA1_ADDRESS, 0, 1, &dummy, 1);
+		ret = i2c_read(i2c_address, 0, 1,
+			       (uchar *)((ulong)spd + 256),
+			       min(256, sizeof(generic_spd_eeprom_t) - 256));
+	}
+#else
 	ret = i2c_read(i2c_address, 0, 1, (uchar *)spd,
 				sizeof(generic_spd_eeprom_t));
+#endif
 
 	if (ret) {
 		if (i2c_address ==
diff --git a/drivers/ddr/fsl/options.c b/drivers/ddr/fsl/options.c
index b0cf046..bf0d1da 100644
--- a/drivers/ddr/fsl/options.c
+++ b/drivers/ddr/fsl/options.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008, 2010-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008, 2010-2014 Freescale Semiconductor, Inc.
  *
  * SPDX-License-Identifier:	GPL-2.0+
  */
@@ -29,7 +29,7 @@
 	unsigned int odt_rtt_wr;
 };
 
-#ifdef CONFIG_SYS_FSL_DDR3
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 static const struct dynamic_odt single_Q[4] = {
 	{	/* cs0 */
 		FSL_DDR_ODT_NEVER,
@@ -259,7 +259,7 @@
 		DDR3_RTT_OFF
 	}
 };
-#else	/* CONFIG_SYS_FSL_DDR3 */
+#else	/* CONFIG_SYS_FSL_DDR3 || CONFIG_SYS_FSL_DDR4 */
 static const struct dynamic_odt single_Q[4] = {
 	{0, 0, 0, 0},
 	{0, 0, 0, 0},
@@ -507,7 +507,9 @@
 	unsigned int i;
 	char buffer[HWCONFIG_BUFFER_SIZE];
 	char *buf = NULL;
-#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR2)
+#if defined(CONFIG_SYS_FSL_DDR3) || \
+	defined(CONFIG_SYS_FSL_DDR2) || \
+	defined(CONFIG_SYS_FSL_DDR4)
 	const struct dynamic_odt *pdodt = odt_unknown;
 #endif
 	ulong ddr_freq;
@@ -519,7 +521,9 @@
 	if (getenv_f("hwconfig", buffer, sizeof(buffer)) > 0)
 		buf = buffer;
 
-#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR2)
+#if defined(CONFIG_SYS_FSL_DDR3) || \
+	defined(CONFIG_SYS_FSL_DDR2) || \
+	defined(CONFIG_SYS_FSL_DDR4)
 	/* Chip select options. */
 	if (CONFIG_DIMM_SLOTS_PER_CTLR == 1) {
 		switch (pdimm[0].n_ranks) {
@@ -585,7 +589,9 @@
 
 	/* Pick chip-select local options. */
 	for (i = 0; i < CONFIG_CHIP_SELECTS_PER_CTRL; i++) {
-#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR2)
+#if defined(CONFIG_SYS_FSL_DDR3) || \
+	defined(CONFIG_SYS_FSL_DDR2) || \
+	defined(CONFIG_SYS_FSL_DDR4)
 		popts->cs_local_opts[i].odt_rd_cfg = pdodt[i].odt_rd_cfg;
 		popts->cs_local_opts[i].odt_wr_cfg = pdodt[i].odt_wr_cfg;
 		popts->cs_local_opts[i].odt_rtt_norm = pdodt[i].odt_rtt_norm;
@@ -703,7 +709,7 @@
 	popts->x4_en = (pdimm[0].device_width == 4) ? 1 : 0;
 
 	/* Choose burst length. */
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 #if defined(CONFIG_E500MC)
 	popts->otf_burst_chop_en = 0;	/* on-the-fly burst chop disable */
 	popts->burst_length = DDR_BL8;	/* Fixed 8-beat burst len */
@@ -722,7 +728,7 @@
 #endif
 
 	/* Choose ddr controller address mirror mode */
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 	popts->mirrored_dimm = pdimm[0].mirrored_dimm;
 #endif
 
@@ -766,11 +772,9 @@
 	 * BSTTOPRE precharge interval
 	 *
 	 * Set this to 0 for global auto precharge
-	 *
-	 * FIXME: Should this be configured in picoseconds?
-	 * Why it should be in ps:  better understanding of this
-	 * relative to actual DRAM timing parameters such as tRAS.
-	 * e.g. tRAS(min) = 40 ns
+	 * The value of 0x100 has been used for DDR1, DDR2, DDR3.
+	 * It is not wrong. Any value should be OK. The performance depends on
+	 * applications. There is no one good value for all.
 	 */
 	popts->bstopre = 0x100;
 
@@ -795,12 +799,12 @@
 	 */
 	popts->tfaw_window_four_activates_ps = 37500;
 
-#elif defined(CONFIG_SYS_FSL_DDR3)
+#else
 	popts->tfaw_window_four_activates_ps = pdimm[0].tfaw_ps;
 #endif
 	popts->zq_en = 0;
 	popts->wrlvl_en = 0;
-#if defined(CONFIG_SYS_FSL_DDR3)
+#if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4)
 	/*
 	 * due to ddr3 dimm is fly-by topology
 	 * we suggest to enable write leveling to
diff --git a/drivers/ddr/fsl/util.c b/drivers/ddr/fsl/util.c
index ad53658..7a22aa3 100644
--- a/drivers/ddr/fsl/util.c
+++ b/drivers/ddr/fsl/util.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008-2014 Freescale Semiconductor, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -23,6 +23,18 @@
 
 #define ULL_8FS 0xFFFFFFFFULL
 
+u32 fsl_ddr_get_version(void)
+{
+	struct ccsr_ddr __iomem *ddr;
+	u32 ver_major_minor_errata;
+
+	ddr = (void *)_DDR_ADDR;
+	ver_major_minor_errata = (ddr_in32(&ddr->ip_rev1) & 0xFFFF) << 8;
+	ver_major_minor_errata |= (ddr_in32(&ddr->ip_rev2) & 0xFF00) >> 8;
+
+	return ver_major_minor_errata;
+}
+
 /*
  * Round up mclk_ps to nearest 1 ps in memory controller code
  * if the error is 0.5ps or more.
@@ -175,6 +187,9 @@
 	case SDRAM_TYPE_DDR3:
 		puts("3");
 		break;
+	case SDRAM_TYPE_DDR4:
+		puts("4");
+		break;
 	default:
 		puts("?");
 		break;
@@ -188,9 +203,12 @@
 		puts(", 64-bit");
 
 	/* Calculate CAS latency based on timing cfg values */
-	cas_lat = ((ddr_in32(&ddr->timing_cfg_1) >> 16) & 0xf) + 1;
-	if ((ddr_in32(&ddr->timing_cfg_3) >> 12) & 1)
-		cas_lat += (8 << 1);
+	cas_lat = ((ddr_in32(&ddr->timing_cfg_1) >> 16) & 0xf);
+	if (fsl_ddr_get_version() <= 0x40400)
+		cas_lat += 1;
+	else
+		cas_lat += 2;
+	cas_lat += ((ddr_in32(&ddr->timing_cfg_3) >> 12) & 3) << 4;
 	printf(", CL=%d", cas_lat >> 1);
 	if (cas_lat & 0x1)
 		puts(".5");