arm: socfpga: Enhance FPGA program write rbf data with size >= 4 bytes

Existing FPGA program write is always assume RBF data >= 32 bytes, so
any rbf data less than 32 bytes writing to FPGA would be failed.
This patch enhances the FPGA program write to support rbf data with
size >= 4 bytes.

Signed-off-by: Tien Fong Chee <tien.fong.chee@intel.com>
diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c
index 28fa16b..6e14ebd 100644
--- a/drivers/fpga/socfpga.c
+++ b/drivers/fpga/socfpga.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 Altera Corporation <www.altera.com>
+ * Copyright (C) 2012-2017 Altera Corporation <www.altera.com>
  * All rights reserved.
  *
  * SPDX-License-Identifier:	BSD-3-Clause
@@ -55,18 +55,20 @@
 	uint32_t loops4 = DIV_ROUND_UP(rbf_size % 32, 4);
 
 	asm volatile(
+		"	cmp	%2,	#0\n"
+		"	beq	2f\n"
 		"1:	ldmia	%0!,	{r0-r7}\n"
 		"	stmia	%1!,	{r0-r7}\n"
 		"	sub	%1,	#32\n"
 		"	subs	%2,	#1\n"
 		"	bne	1b\n"
-		"	cmp	%3,	#0\n"
-		"	beq	3f\n"
-		"2:	ldr	%2,	[%0],	#4\n"
+		"2:	cmp	%3,	#0\n"
+		"	beq	4f\n"
+		"3:	ldr	%2,	[%0],	#4\n"
 		"	str	%2,	[%1]\n"
 		"	subs	%3,	#1\n"
-		"	bne	2b\n"
-		"3:	nop\n"
+		"	bne	3b\n"
+		"4:	nop\n"
 		: "+r"(src), "+r"(dst), "+r"(loops32), "+r"(loops4) :
 		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "cc");
 }