Align global_data to a 16-byte boundary

Some archs like to have larger alignment for their global data. Use 16 bytes
which suits all current archs.

Signed-off-by: Simon Glass <sjg@chromium.org>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
diff --git a/common/board_f.c b/common/board_f.c
index 8cca4de..74f77f1 100644
--- a/common/board_f.c
+++ b/common/board_f.c
@@ -499,6 +499,7 @@
 static int reserve_global_data(void)
 {
 	gd->start_addr_sp -= sizeof(gd_t);
+	gd->start_addr_sp &= ~0xf;
 	gd->new_gd = (gd_t *)map_sysmem(gd->start_addr_sp, sizeof(gd_t));
 	debug("Reserving %zu Bytes for Global Data at: %08lx\n",
 			sizeof(gd_t), gd->start_addr_sp);
diff --git a/include/asm-generic/global_data.h b/include/asm-generic/global_data.h
index 2155265..cc369fc 100644
--- a/include/asm-generic/global_data.h
+++ b/include/asm-generic/global_data.h
@@ -99,7 +99,8 @@
 	int pcidelay_done;
 #endif
 	struct udevice *cur_serial_dev;	/* current serial device */
-	struct arch_global_data arch;	/* architecture-specific data */
+	/* arch-specific data */
+	struct arch_global_data arch __attribute__((aligned(16)));
 } gd_t;
 #endif