x86: Move loading of GTD to C code

Linux has C macros and code to load the GTD after switching to Protected
Mode. Using these greatly simplifies the assembler code
diff --git a/arch/i386/cpu/cpu.c b/arch/i386/cpu/cpu.c
index bd6aced..ae40384 100644
--- a/arch/i386/cpu/cpu.c
+++ b/arch/i386/cpu/cpu.c
@@ -37,6 +37,61 @@
 #include <command.h>
 #include <asm/interrupt.h>
 
+/* Constructor for a conventional segment GDT (or LDT) entry */
+/* This is a macro so it can be used in initializers */
+#define GDT_ENTRY(flags, base, limit)			\
+	((((base)  & 0xff000000ULL) << (56-24)) |	\
+	 (((flags) & 0x0000f0ffULL) << 40) |		\
+	 (((limit) & 0x000f0000ULL) << (48-16)) |	\
+	 (((base)  & 0x00ffffffULL) << 16) |		\
+	 (((limit) & 0x0000ffffULL)))
+
+/* Simple and small GDT entries for booting only */
+
+#define GDT_ENTRY_32BIT_CS	2
+#define GDT_ENTRY_32BIT_DS	(GDT_ENTRY_32BIT_CS + 1)
+#define GDT_ENTRY_16BIT_CS	(GDT_ENTRY_32BIT_DS + 1)
+#define GDT_ENTRY_16BIT_DS	(GDT_ENTRY_16BIT_CS + 1)
+
+/*
+ * Set up the GDT
+ */
+
+struct gdt_ptr {
+	u16 len;
+	u32 ptr;
+} __attribute__((packed));
+
+static void reload_gdt(void)
+{
+	/* There are machines which are known to not boot with the GDT
+	   being 8-byte unaligned.  Intel recommends 16 byte alignment. */
+	static const u64 boot_gdt[] __attribute__((aligned(16))) = {
+		/* CS: code, read/execute, 4 GB, base 0 */
+		[GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
+		/* DS: data, read/write, 4 GB, base 0 */
+		[GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
+		/* 16-bit CS: code, read/execute, 64 kB, base 0 */
+		[GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff),
+		/* 16-bit DS: data, read/write, 64 kB, base 0 */
+		[GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff),
+	};
+	static struct gdt_ptr gdt;
+
+	gdt.len = sizeof(boot_gdt)-1;
+	gdt.ptr = (u32)&boot_gdt;
+
+	asm volatile("lgdtl %0\n" \
+		     "movl $((2+1)*8), %%ecx\n" \
+		     "movl %%ecx, %%ds\n" \
+		     "movl %%ecx, %%es\n" \
+		     "movl %%ecx, %%fs\n" \
+		     "movl %%ecx, %%gs\n" \
+		     "movl %%ecx, %%ss" \
+		     : : "m" (gdt) : "ecx");
+}
+
+
 int cpu_init_f(void)
 {
 	/* initialize FPU, reset EM, set MP and NE */
@@ -51,6 +106,8 @@
 
 int cpu_init_r(void)
 {
+	reload_gdt();
+
 	/* Initialize core interrupt and exception functionality of CPU */
 	cpu_init_interrupts ();
 	return 0;
diff --git a/arch/i386/cpu/start.S b/arch/i386/cpu/start.S
index 7def8de..3cea04b 100644
--- a/arch/i386/cpu/start.S
+++ b/arch/i386/cpu/start.S
@@ -100,53 +100,6 @@
 	jmp     get_mem_size
 get_mem_size_ret:
 
-	/*
-	 * We are now in 'Flat Protected Mode' and we know how much memory
-	 * the board has. The (temporary) Global Descriptor Table is not
-	 * in a 'Safe' place (it is either in Flash which can be erased or
-	 * reprogrammed or in a fail-safe boot-strap image which could be
-	 * over-written).
-	 *
-	 * Move the final gdt to a safe place (top of RAM) and load it.
-	 * This is not a trivial excercise - the lgdt instruction does not
-	 * have a register operand (memory only) and we may well be
-	 * running from Flash, so self modifying code will not work here.
-	 * To overcome this, we copy a stub into upper memory along with
-	 * the GDT.
-	 */
-
-	/* Reduce upper memory limit by (Stub + GDT Pointer + GDT) */
-	subl	$(end_gdt_setup - start_gdt_setup), %eax
-
-	/* Copy the GDT and Stub */
-	movl	$start_gdt_setup, %esi
-	movl	%eax, %edi
-	movl	$(end_gdt_setup - start_gdt_setup), %ecx
-	shrl	$2, %ecx
-	cld
-	rep	movsl
-
-	/* write the lgdt 'parameter' */
-	subl	$(jmp_instr - start_gdt_setup - 4), %ebp
-	addl	%eax, %ebp
-	movl	$(gdt_ptr - start_gdt_setup), %ebx
-	addl	%eax, %ebx
-	movl	%ebx, (%ebp)
-
-	/* write the gdt address into the pointer */
-	movl	$(gdt_addr - start_gdt_setup), %ebp
-	addl	%eax, %ebp
-	movl	$(gdt - start_gdt_setup), %ebx
-	addl	%eax, %ebx
-	movl	%ebx, (%ebp)
-
-	/* Save the return address */
-	movl	$load_gdt_ret, %ebp
-
-	/* Load the new (safe) Global Descriptor Table */
-	jmp	*%eax
-
-load_gdt_ret:
 	/* Check we have enough memory for stack */
 	movl	$CONFIG_SYS_STACK_SIZE, %ecx
 	cmpl	%ecx, %eax
@@ -221,52 +174,3 @@
 blank_idt_ptr:
 	.word	0		/* limit */
 	.long	0		/* base */
-
-.align 4
-start_gdt_setup:
-	lgdt	gdt_ptr
-jmp_instr:
-	jmp	*%ebp
-
-.align 4
-gdt_ptr:
-	.word	0x30		/* limit (48 bytes = 6 GDT entries) */
-gdt_addr:
-	.long	gdt		/* base */
-
-	/* The GDT table ...
-	 *
-	 *	 Selector	Type
-	 *	 0x00		NULL
-	 *	 0x08		Unused
-	 *	 0x10		32bit code
-	 *	 0x18		32bit data/stack
-	 *	 0x20		16bit code
-	 *	 0x28		16bit data/stack
-	 */
-
-.align 4
-gdt:
-	.word	0, 0, 0, 0	/* NULL  */
-	.word	0, 0, 0, 0	/* unused */
-
-	.word	0xFFFF		/* 4Gb - (0x100000*0x1000 = 4Gb) */
-	.word	0		/* base address = 0 */
-	.word	0x9B00		/* code read/exec */
-	.word	0x00CF		/* granularity = 4096, 386 (+5th nibble of limit) */
-
-	.word	0xFFFF		/* 4Gb - (0x100000*0x1000 = 4Gb) */
-	.word	0x0		/* base address = 0 */
-	.word	0x9300		/* data read/write */
-	.word	0x00CF		/* granularity = 4096, 386 (+5th nibble of limit) */
-
-	.word	0xFFFF		/* 64kb */
-	.word	0		/* base address = 0 */
-	.word	0x9b00		/* data read/write */
-	.word	0x0010		/* granularity = 1  (+5th nibble of limit) */
-
-	.word	0xFFFF		/* 64kb */
-	.word	0		/* base address = 0 */
-	.word	0x9300		/* data read/write */
-	.word	0x0010		/* granularity = 1 (+5th nibble of limit) */
-end_gdt_setup: