usb: dwc2: fix aligned buffer usage

The original aligned_buffer usage:
a) Uselessly copied data into the aligned buffer even for IN
   transactions. Fix this my making the copy conditional.
b) Always programmed the HW to transfer to/from the start of the aligned
   buffer. This worked fine for OUT transactions since the memcpy copied
   the OUT data to this location too. However, for large IN transactions,
   since the copy from the aligned buffer to the "client" buffer was
   deferred until after all chunks were transferred. it resulted in each
   chunk's transfer over-writing the data for the first transfer. Fix
   this by copying IN data as soon as it's received.

Signed-off-by: Stephen Warren <swarren@wwwdotorg.org>
diff --git a/drivers/usb/host/dwc2.c b/drivers/usb/host/dwc2.c
index 5a1c44a..05d21b7 100644
--- a/drivers/usb/host/dwc2.c
+++ b/drivers/usb/host/dwc2.c
@@ -795,7 +795,9 @@
 		       (*pid << DWC2_HCTSIZ_PID_OFFSET),
 		       &hc_regs->hctsiz);
 
-		memcpy(aligned_buffer, (char *)buffer + done, len - done);
+		if (!in)
+			memcpy(aligned_buffer, (char *)buffer + done, len);
+
 		writel((uint32_t)aligned_buffer, &hc_regs->hcdma);
 
 		/* Set host channel enable after all other setup is complete. */
@@ -810,16 +812,16 @@
 			break;
 		}
 
-		done += xfer_len;
 		if (in) {
-			done -= sub;
+			xfer_len -= sub;
+			memcpy(buffer + done, aligned_buffer, xfer_len);
 			if (sub)
 				stop_transfer = 1;
 		}
-	} while ((done < len) && !stop_transfer);
 
-	if (done && in)
-		memcpy(buffer, aligned_buffer, done);
+		done += xfer_len;
+
+	} while ((done < len) && !stop_transfer);
 
 	writel(0, &hc_regs->hcintmsk);
 	writel(0xFFFFFFFF, &hc_regs->hcint);