tools: kwbimage: Enforce 128-bit boundary alignment only for Sheeva CPU

This alignment is required only for platforms based on Sheeva CPU core
which are A370 and AXP. Now when U-Boot build system correctly propagates
LOAD_ADDRESS there is no need to have enabled 128-bit boundary alignment on
platforms which do not need it. Previously it was required because load
address was implicitly rounded to 128-bit boundary and U-Boot build system
expected it and misused it. Now with explicit setting of LOAD_ADDRESS there
is no guessing for load address anymore.

Signed-off-by: Pali Rohár <pali@kernel.org>
Reviewed-by: Stefan Roese <sr@denx.de>
diff --git a/tools/kwbimage.c b/tools/kwbimage.c
index ce053a4..7c21060 100644
--- a/tools/kwbimage.c
+++ b/tools/kwbimage.c
@@ -1101,8 +1101,10 @@
 				return 0;
 			}
 			headersz = e->binary.loadaddr - base_addr;
-		} else {
+		} else if (cpu_sheeva) {
 			headersz = ALIGN(headersz, 16);
+		} else {
+			headersz = ALIGN(headersz, 4);
 		}
 
 		headersz += ALIGN(s.st_size, 4) + sizeof(uint32_t);
@@ -1158,8 +1160,8 @@
 	*cur += (binarye->binary.nargs + 1) * sizeof(uint32_t);
 
 	/*
-	 * ARM executable code inside the BIN header on some mvebu platforms
-	 * (e.g. A370, AXP) must always be aligned with the 128-bit boundary.
+	 * ARM executable code inside the BIN header on platforms with Sheeva
+	 * CPU (A370 and AXP) must always be aligned with the 128-bit boundary.
 	 * In the case when this code is not position independent (e.g. ARM
 	 * SPL), it must be placed at fixed load and execute address.
 	 * This requirement can be met by inserting dummy arguments into
@@ -1170,8 +1172,10 @@
 	offset = *cur - (uint8_t *)main_hdr;
 	if (binarye->binary.loadaddr)
 		add_args = (binarye->binary.loadaddr - base_addr - offset) / sizeof(uint32_t);
-	else
+	else if (cpu_sheeva)
 		add_args = ((16 - offset % 16) % 16) / sizeof(uint32_t);
+	else
+		add_args = 0;
 	if (add_args) {
 		*(args - 1) = cpu_to_le32(binarye->binary.nargs + add_args);
 		*cur += add_args * sizeof(uint32_t);