sh: share the correct version of start.S among all cpus

It is easy to note that SH2/SH3/SH4 start.S code is practically
the same with a minor difference for SH2 where a short data header is
present. To avoid unwanted code duplication and to automatically
convert SH2 and SH3 platforms to generic board support move fixed SH4
start.S into arch/sh/lib/start.S and share it among all platforms.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Reviewed-by: Simon Glass <sjg@chromium.org>
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile
index 7be20b1..473cf0d 100644
--- a/arch/sh/lib/Makefile
+++ b/arch/sh/lib/Makefile
@@ -5,6 +5,8 @@
 # SPDX-License-Identifier:	GPL-2.0+
 #
 
+extra-y	+= start.o
+
 obj-y	+= board.o
 obj-$(CONFIG_CMD_BOOTM) += bootm.o
 ifeq ($(CONFIG_CPU_SH2),y)
diff --git a/arch/sh/lib/start.S b/arch/sh/lib/start.S
new file mode 100644
index 0000000..37d38d5
--- /dev/null
+++ b/arch/sh/lib/start.S
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 Vladimir Zapolskiy <vz@mleia.com>
+ * Copyright (C) 2007, 2010 Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+
+	.text
+	.align	2
+
+	.global	_start
+_start:
+#ifdef CONFIG_CPU_SH2
+	.long 0x00000010        /* Ppower ON reset PC*/
+	.long 0x00000000
+	.long 0x00000010        /* Manual reset PC */
+	.long 0x00000000
+#endif
+	mov.l	._lowlevel_init, r0
+100:	bsrf	r0
+	nop
+
+	bsr	1f
+	nop
+1:	sts	pr, r5
+	mov.l	._reloc_dst, r4
+	add	#(_start-1b), r5
+	mov.l	._reloc_dst_end, r6
+
+2:	mov.l	@r5+, r1
+	mov.l	r1, @r4
+	add	#4, r4
+	cmp/hs	r6, r4
+	bf	2b
+
+	mov.l	._bss_start, r4
+	mov.l	._bss_end, r5
+	mov	#0, r1
+
+3:	mov.l	r1, @r4			/* bss clear */
+	add	#4, r4
+	cmp/hs	r5, r4
+	bf	3b
+
+	mov.l	._gd_init, r13		/* global data */
+	mov.l	._stack_init, r15	/* stack */
+
+	mov.l	._sh_generic_init, r0
+	jsr	@r0
+	mov     #0, r4
+
+loop:
+	bra	loop
+
+	.align	2
+
+._lowlevel_init:	.long	(lowlevel_init - (100b + 4))
+._reloc_dst:		.long	_start
+._reloc_dst_end:	.long	reloc_dst_end
+._bss_start:		.long	bss_start
+._bss_end:		.long	bss_end
+._gd_init:		.long	(_start - GENERATED_GBL_DATA_SIZE)
+._stack_init:		.long	(_start - GENERATED_GBL_DATA_SIZE - CONFIG_SYS_MALLOC_LEN - 16)
+._sh_generic_init:	.long	board_init_f