dhry: Correct dhrystone calculation for fast machines

At present samus reports about 5600 DMIPS. With the default iteration count
this is OK, but if 10 million runs are performed it overflows. Fix it.

Signed-off-by: Simon Glass <sjg@chromium.org>
diff --git a/lib/dhry/cmd_dhry.c b/lib/dhry/cmd_dhry.c
index 5dc191e..d7e1e6a 100644
--- a/lib/dhry/cmd_dhry.c
+++ b/lib/dhry/cmd_dhry.c
@@ -6,11 +6,13 @@
 
 #include <common.h>
 #include <command.h>
+#include <div64.h>
 #include "dhry.h"
 
 static int do_dhry(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
 {
-	ulong start, duration, dhry_per_sec, vax_mips;
+	ulong start, duration, vax_mips;
+	u64 dhry_per_sec;
 	int iterations = 1000000;
 
 	if (argc > 1)
@@ -19,10 +21,10 @@
 	start = get_timer(0);
 	dhry(iterations);
 	duration = get_timer(start);
-	dhry_per_sec = iterations * 1000 / duration;
+	dhry_per_sec = lldiv(iterations * 1000ULL, duration);
 	vax_mips = dhry_per_sec / 1757;
 	printf("%d iterations in %lu ms: %lu/s, %lu DMIPS\n", iterations,
-	       duration, dhry_per_sec, vax_mips);
+	       duration, (ulong)dhry_per_sec, vax_mips);
 
 	return 0;
 }