Merge branch '2021-07-09-arm-updates'

- Assorted ARM platform updates
diff --git a/board/Marvell/octeontx/board-fdt.c b/board/Marvell/octeontx/board-fdt.c
index 0b05ef1..1db2a4a 100644
--- a/board/Marvell/octeontx/board-fdt.c
+++ b/board/Marvell/octeontx/board-fdt.c
@@ -281,20 +281,16 @@
 	}
 
 	if (blob) {
+		/* delete cavium,bdk node if it exists */
 		offset = fdt_path_offset(blob, "/cavium,bdk");
-		if (offset < 0) {
-			printf("ERROR: FDT BDK node not found\n");
-			return offset;
+		if (offset >= 0) {
+			ret = fdt_del_node(blob, offset);
+			if (ret < 0) {
+				printf("WARNING : could not remove bdk node\n");
+				return ret;
+			}
+			debug("%s deleted bdk node\n", __func__);
 		}
-
-		/* delete node */
-		ret = fdt_del_node(blob, offset);
-		if (ret < 0) {
-			printf("WARNING : could not remove bdk node\n");
-			return ret;
-		}
-
-		debug("%s deleted bdk node\n", __func__);
 	}
 
 	return 0;
diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c
index b0f47c3..3b767d7 100644
--- a/drivers/clk/mvebu/armada-37xx-periph.c
+++ b/drivers/clk/mvebu/armada-37xx-periph.c
@@ -626,4 +626,5 @@
 	.ops		= &armada_37xx_periph_clk_ops,
 	.priv_auto	= sizeof(struct a37xx_periphclk),
 	.probe		= armada_37xx_periph_clk_probe,
+	.flags		= DM_FLAG_PRE_RELOC,
 };
diff --git a/drivers/clk/mvebu/armada-37xx-tbg.c b/drivers/clk/mvebu/armada-37xx-tbg.c
index b1c0852..054aff5 100644
--- a/drivers/clk/mvebu/armada-37xx-tbg.c
+++ b/drivers/clk/mvebu/armada-37xx-tbg.c
@@ -152,4 +152,5 @@
 	.ops		= &armada_37xx_tbg_clk_ops,
 	.priv_auto	= sizeof(struct a37xx_tbgclk),
 	.probe		= armada_37xx_tbg_clk_probe,
+	.flags		= DM_FLAG_PRE_RELOC,
 };
diff --git a/drivers/serial/serial_mvebu_a3700.c b/drivers/serial/serial_mvebu_a3700.c
index 8f40487..c7e66fe 100644
--- a/drivers/serial/serial_mvebu_a3700.c
+++ b/drivers/serial/serial_mvebu_a3700.c
@@ -4,12 +4,16 @@
  */
 
 #include <common.h>
+#include <clk.h>
 #include <dm.h>
 #include <serial.h>
 #include <asm/io.h>
+#include <asm/arch/cpu.h>
 
 struct mvebu_plat {
 	void __iomem *base;
+	ulong tbg_rate;
+	u8 tbg_idx;
 };
 
 /*
@@ -29,8 +33,6 @@
 #define UART_CTRL_RXFIFO_RESET	0x4000
 #define UART_CTRL_TXFIFO_RESET	0x8000
 
-#define CONFIG_UART_BASE_CLOCK	25804800
-
 static int mvebu_serial_putc(struct udevice *dev, const char ch)
 {
 	struct mvebu_plat *plat = dev_get_plat(dev);
@@ -75,18 +77,70 @@
 {
 	struct mvebu_plat *plat = dev_get_plat(dev);
 	void __iomem *base = plat->base;
+	u32 divider, d1, d2;
+	u32 oversampling;
 
 	/*
 	 * Calculate divider
 	 * baudrate = clock / 16 / divider
 	 */
-	writel(CONFIG_UART_BASE_CLOCK / baudrate / 16, base + UART_BAUD_REG);
+	d1 = d2 = 1;
+	divider = DIV_ROUND_CLOSEST(plat->tbg_rate, baudrate * 16 * d1 * d2);
 
 	/*
 	 * Set Programmable Oversampling Stack to 0,
 	 * UART defaults to 16x scheme
 	 */
-	writel(0, base + UART_POSSR_REG);
+	oversampling = 0;
+
+	if (divider < 1)
+		divider = 1;
+	else if (divider > 1023) {
+		/*
+		 * If divider is too high for selected baudrate then set
+		 * divider d1 to the maximal value 6.
+		 */
+		d1 = 6;
+		divider = DIV_ROUND_CLOSEST(plat->tbg_rate,
+					    baudrate * 16 * d1 * d2);
+		if (divider < 1)
+			divider = 1;
+		else if (divider > 1023) {
+			/*
+			 * If divider is still too high then set also divider
+			 * d2 to the maximal value 6.
+			 */
+			d2 = 6;
+			divider = DIV_ROUND_CLOSEST(plat->tbg_rate,
+						    baudrate * 16 * d1 * d2);
+			if (divider < 1)
+				divider = 1;
+			else if (divider > 1023) {
+				/*
+				 * And if divider is still to high then
+				 * use oversampling with maximal factor 63.
+				 */
+				oversampling = (63 << 0) | (63 << 8) |
+					      (63 << 16) | (63 << 24);
+				divider = DIV_ROUND_CLOSEST(plat->tbg_rate,
+						baudrate * 63 * d1 * d2);
+				if (divider < 1)
+					divider = 1;
+				else if (divider > 1023)
+					divider = 1023;
+			}
+		}
+	}
+
+	divider |= BIT(19); /* Do not use XTAL as a base clock */
+	divider |= d1 << 15; /* Set d1 divider */
+	divider |= d2 << 12; /* Set d2 divider */
+	divider |= plat->tbg_idx << 10; /* Use selected TBG as a base clock */
+
+	while (!(readl(base + UART_STATUS_REG) & UART_STATUS_TX_EMPTY))
+		;
+	writel(divider, base + UART_BAUD_REG);
+	writel(oversampling, base + UART_POSSR_REG);
 
 	return 0;
 }
@@ -95,6 +149,50 @@
 {
 	struct mvebu_plat *plat = dev_get_plat(dev);
 	void __iomem *base = plat->base;
+	struct udevice *nb_clk;
+	ofnode nb_clk_node;
+	int i, res;
+
+	nb_clk_node = ofnode_by_compatible(ofnode_null(),
+					   "marvell,armada-3700-periph-clock-nb");
+	if (!ofnode_valid(nb_clk_node)) {
+		printf("%s: NB periph clock node not available\n", __func__);
+		return -ENODEV;
+	}
+
+	res = device_get_global_by_ofnode(nb_clk_node, &nb_clk);
+	if (res) {
+		printf("%s: Cannot get NB periph clock\n", __func__);
+		return res;
+	}
+
+	/*
+	 * Choose the TBG clock with lowest frequency which allows to configure
+	 * UART also at lower baudrates.
+	 */
+	for (i = 0; i < 4; i++) {
+		struct clk clk;
+		ulong rate;
+
+		res = clk_get_by_index_nodev(nb_clk_node, i, &clk);
+		if (res) {
+			printf("%s: Cannot get TBG clock %i: %i\n", __func__,
+			       i, res);
+			return -ENODEV;
+		}
+
+		rate = clk_get_rate(&clk);
+		if (!rate || IS_ERR_VALUE(rate)) {
+			printf("%s: Cannot get rate for TBG clock %i\n",
+			       __func__, i);
+			return -EINVAL;
+		}
+
+		if (!i || plat->tbg_rate > rate) {
+			plat->tbg_rate = rate;
+			plat->tbg_idx = i;
+		}
+	}
 
 	/* reset FIFOs */
 	writel(UART_CTRL_RXFIFO_RESET | UART_CTRL_TXFIFO_RESET,
@@ -106,6 +204,71 @@
 	return 0;
 }
 
+static int mvebu_serial_remove(struct udevice *dev)
+{
+	struct mvebu_plat *plat = dev_get_plat(dev);
+	void __iomem *base = plat->base;
+	ulong new_parent_rate, parent_rate;
+	u32 new_divider, divider;
+	u32 new_oversampling;
+	u32 oversampling;
+	u32 d1, d2;
+
+	/*
+	 * Switch UART base clock back to XTAL because older Linux kernel
+	 * expects it. Otherwise it does not calculate UART divisor correctly
+	 * and therefore UART does not work in kernel.
+	 */
+	divider = readl(base + UART_BAUD_REG);
+	if (!(divider & BIT(19))) /* UART already uses XTAL */
+		return 0;
+
+	/* Read current divisors settings */
+	d1 = (divider >> 15) & 7;
+	d2 = (divider >> 12) & 7;
+	parent_rate = plat->tbg_rate;
+	divider &= 1023;
+	oversampling = readl(base + UART_POSSR_REG) & 63;
+	if (!oversampling)
+		oversampling = 16;
+
+	/* Calculate new divisor against XTAL clock without changing baudrate */
+	new_oversampling = 0;
+	new_parent_rate = get_ref_clk() * 1000000;
+	new_divider = DIV_ROUND_CLOSEST(new_parent_rate * divider * d1 * d2 *
+					oversampling, parent_rate * 16);
+
+	/*
+	 * UART does not work reliably when XTAL divisor is smaller than 4.
+	 * In this case we do not switch UART parent to XTAL. User either
+	 * configured unsupported settings or has newer kernel with patches
+	 * which allow usage of non-XTAL clock as a parent clock.
+	 */
+	if (new_divider < 4)
+		return 0;
+
+	/*
+	 * If new divisor is larger than maximal supported, try to switch
+	 * from default x16 scheme to oversampling with maximal factor 63.
+	 */
+	if (new_divider > 1023) {
+		new_oversampling = 63;
+		new_divider = DIV_ROUND_CLOSEST(new_parent_rate * divider * d1 *
+						d2 * oversampling,
+						parent_rate * new_oversampling);
+		if (new_divider < 4 || new_divider > 1023)
+			return 0;
+	}
+
+	while (!(readl(base + UART_STATUS_REG) & UART_STATUS_TX_EMPTY))
+		;
+
+	writel(new_divider, base + UART_BAUD_REG);
+	writel(new_oversampling, base + UART_POSSR_REG);
+
+	return 0;
+}
+
 static int mvebu_serial_of_to_plat(struct udevice *dev)
 {
 	struct mvebu_plat *plat = dev_get_plat(dev);
@@ -134,6 +297,8 @@
 	.of_to_plat = mvebu_serial_of_to_plat,
 	.plat_auto	= sizeof(struct mvebu_plat),
 	.probe	= mvebu_serial_probe,
+	.remove	= mvebu_serial_remove,
+	.flags	= DM_FLAG_OS_PREPARE,
 	.ops	= &mvebu_serial_ops,
 };
 
@@ -144,6 +309,7 @@
 static inline void _debug_uart_init(void)
 {
 	void __iomem *base = (void __iomem *)CONFIG_DEBUG_UART_BASE;
+	u32 baudrate, parent_rate, divider;
 
 	/* reset FIFOs */
 	writel(UART_CTRL_RXFIFO_RESET | UART_CTRL_TXFIFO_RESET,
@@ -156,7 +322,10 @@
 	 * Calculate divider
 	 * baudrate = clock / 16 / divider
 	 */
-	writel(CONFIG_UART_BASE_CLOCK / 115200 / 16, base + UART_BAUD_REG);
+	baudrate = 115200;
+	parent_rate = get_ref_clk() * 1000000;
+	divider = DIV_ROUND_CLOSEST(parent_rate, baudrate * 16);
+	writel(divider, base + UART_BAUD_REG);
 
 	/*
 	 * Set Programmable Oversampling Stack to 0,
diff --git a/include/configs/mvebu_armada-37xx.h b/include/configs/mvebu_armada-37xx.h
index 2ad4325..a2bea29 100644
--- a/include/configs/mvebu_armada-37xx.h
+++ b/include/configs/mvebu_armada-37xx.h
@@ -17,8 +17,13 @@
 
 #define CONFIG_SYS_BOOTM_LEN	SZ_64M /* Increase max gunzip size */
 
-#define CONFIG_SYS_BAUDRATE_TABLE	{ 9600, 19200, 38400, 57600, \
-					  115200, 230400, 460800, 921600 }
+#define CONFIG_SYS_BAUDRATE_TABLE	{ 300, 600, 1200, 1800, 2400, 4800, \
+					  9600, 19200, 38400, 57600, 115200, \
+					  230400, 460800, 500000, 576000, \
+					  921600, 1000000, 1152000, 1500000, \
+					  2000000, 2500000, 3000000, 3500000, \
+					  4000000, 4500000, 5000000, 5500000, \
+					  6000000 }
 
 /*
  * For booting Linux, the board info and command line data
diff --git a/include/configs/turris_mox.h b/include/configs/turris_mox.h
index 9c021a1..6712839 100644
--- a/include/configs/turris_mox.h
+++ b/include/configs/turris_mox.h
@@ -22,8 +22,13 @@
 
 /* auto boot */
 
-#define CONFIG_SYS_BAUDRATE_TABLE	{ 9600, 19200, 38400, 57600, \
-					  115200, 230400, 460800, 921600 }
+#define CONFIG_SYS_BAUDRATE_TABLE	{ 300, 600, 1200, 1800, 2400, 4800, \
+					  9600, 19200, 38400, 57600, 115200, \
+					  230400, 460800, 500000, 576000, \
+					  921600, 1000000, 1152000, 1500000, \
+					  2000000, 2500000, 3000000, 3500000, \
+					  4000000, 4500000, 5000000, 5500000, \
+					  6000000 }
 
 /*
  * For booting Linux, the board info and command line data