x86: fsp: Load GDT before calling FspInitEntry

Currently the FSP execution environment GDT is setup by U-Boot in
arch/x86/cpu/start16.S, which works pretty well. But if we try to
move the FspInitEntry call a little bit later to better fit into
U-Boot's initialization sequence, FSP will fail to bring up the AP
due to #GP fault as AP's GDT is duplicated from BSP whose GDT is
now moved into CAR, and unfortunately FSP calls AP initialization
after it disables the CAR. So basically the BSP's GDT still refers
to the one in the CAR, whose content is no longer available, so
when AP starts up and loads its segment register, it blows up.

To resolve this, we load GDT before calling into FspInitEntry.
The GDT is the same one used in arch/x86/cpu/start16.S, which is
in the ROM and exists forever.

Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
Tested-by: Andrew Bradford <andrew.bradford@kodakalaris.com>
Tested-by: Simon Glass <sjg@chromium.org>
Acked-by: Simon Glass <sjg@chromium.org>
diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c
index bb4a110..b6c585a 100644
--- a/arch/x86/cpu/cpu.c
+++ b/arch/x86/cpu/cpu.c
@@ -164,6 +164,26 @@
 	load_fs(X86_GDT_ENTRY_32BIT_FS);
 }
 
+#ifdef CONFIG_HAVE_FSP
+/*
+ * Setup FSP execution environment GDT
+ *
+ * Per Intel FSP external architecture specification, before calling any FSP
+ * APIs, we need make sure the system is in flat 32-bit mode and both the code
+ * and data selectors should have full 4GB access range. Here we reuse the one
+ * we used in arch/x86/cpu/start16.S, and reload the segement registers.
+ */
+void setup_fsp_gdt(void)
+{
+	load_gdt((const u64 *)(gdt_rom + CONFIG_RESET_SEG_START), 4);
+	load_ds(X86_GDT_ENTRY_32BIT_DS);
+	load_ss(X86_GDT_ENTRY_32BIT_DS);
+	load_es(X86_GDT_ENTRY_32BIT_DS);
+	load_fs(X86_GDT_ENTRY_32BIT_DS);
+	load_gs(X86_GDT_ENTRY_32BIT_DS);
+}
+#endif
+
 int __weak x86_cleanup_before_linux(void)
 {
 #ifdef CONFIG_BOOTSTAGE_STASH
diff --git a/arch/x86/cpu/start16.S b/arch/x86/cpu/start16.S
index 826e2b4..5eb17f1 100644
--- a/arch/x86/cpu/start16.S
+++ b/arch/x86/cpu/start16.S
@@ -71,11 +71,12 @@
 	 */
 gdt_ptr:
 	.word	0x1f		/* limit (31 bytes = 4 GDT entries - 1) */
-	.long	BOOT_SEG + gdt	/* base */
+	.long	BOOT_SEG + gdt_rom	/* base */
 
 	/* Some CPUs are picky about GDT alignment... */
 	.align	16
-gdt:
+.globl gdt_rom
+gdt_rom:
 	/*
 	 * The GDT table ...
 	 *
diff --git a/arch/x86/include/asm/u-boot-x86.h b/arch/x86/include/asm/u-boot-x86.h
index d1d21ed..3c6ee29 100644
--- a/arch/x86/include/asm/u-boot-x86.h
+++ b/arch/x86/include/asm/u-boot-x86.h
@@ -8,12 +8,19 @@
 #ifndef _U_BOOT_I386_H_
 #define _U_BOOT_I386_H_	1
 
+extern char gdt_rom[];
+
 /* cpu/.../cpu.c */
 int arch_cpu_init(void);
 int x86_cpu_init_f(void);
 int cpu_init_f(void);
 void init_gd(gd_t *id, u64 *gdt_addr);
 void setup_gdt(gd_t *id, u64 *gdt_addr);
+/*
+ * Setup FSP execution environment GDT to use the one we used in
+ * arch/x86/cpu/start16.S and reload the segment registers.
+ */
+void setup_fsp_gdt(void);
 int init_cache(void);
 int cleanup_before_linux(void);
 
diff --git a/arch/x86/lib/fsp/fsp_support.c b/arch/x86/lib/fsp/fsp_support.c
index 5809235..4585166 100644
--- a/arch/x86/lib/fsp/fsp_support.c
+++ b/arch/x86/lib/fsp/fsp_support.c
@@ -173,6 +173,9 @@
 
 	post_code(POST_PRE_MRC);
 
+	/* Load GDT for FSP */
+	setup_fsp_gdt();
+
 	/*
 	 * Use ASM code to ensure the register value in EAX & ECX
 	 * will be passed into BlContinuationFunc