Simon Glass | 9ab6049 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2010, Google Inc. |
| 3 | * |
| 4 | * Brought in from coreboot uldivmod.S |
| 5 | * |
| 6 | * SPDX-License-Identifier: GPL-2.0 |
| 7 | */ |
| 8 | |
| 9 | #include <linux/linkage.h> |
| 10 | #include <asm/assembler.h> |
| 11 | |
| 12 | /* We don't use Thumb instructions for now */ |
| 13 | #define ARM(x...) x |
| 14 | #define THUMB(x...) |
| 15 | |
| 16 | /* |
| 17 | * A, Q = r0 + (r1 << 32) |
| 18 | * B, R = r2 + (r3 << 32) |
| 19 | * A / B = Q ... R |
| 20 | */ |
| 21 | |
| 22 | A_0 .req r0 |
| 23 | A_1 .req r1 |
| 24 | B_0 .req r2 |
| 25 | B_1 .req r3 |
| 26 | C_0 .req r4 |
| 27 | C_1 .req r5 |
| 28 | D_0 .req r6 |
| 29 | D_1 .req r7 |
| 30 | |
| 31 | Q_0 .req r0 |
| 32 | Q_1 .req r1 |
| 33 | R_0 .req r2 |
| 34 | R_1 .req r3 |
| 35 | |
| 36 | THUMB( |
| 37 | TMP .req r8 |
| 38 | ) |
| 39 | |
| 40 | ENTRY(__aeabi_uldivmod) |
| 41 | stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} |
| 42 | @ Test if B == 0 |
| 43 | orrs ip, B_0, B_1 @ Z set -> B == 0 |
| 44 | beq L_div_by_0 |
| 45 | @ Test if B is power of 2: (B & (B - 1)) == 0 |
| 46 | subs C_0, B_0, #1 |
| 47 | sbc C_1, B_1, #0 |
| 48 | tst C_0, B_0 |
| 49 | tsteq B_1, C_1 |
| 50 | beq L_pow2 |
| 51 | @ Test if A_1 == B_1 == 0 |
| 52 | orrs ip, A_1, B_1 |
| 53 | beq L_div_32_32 |
| 54 | |
| 55 | L_div_64_64: |
| 56 | /* CLZ only exists in ARM architecture version 5 and above. */ |
| 57 | #ifdef HAVE_CLZ |
| 58 | mov C_0, #1 |
| 59 | mov C_1, #0 |
| 60 | @ D_0 = clz A |
| 61 | teq A_1, #0 |
| 62 | clz D_0, A_1 |
| 63 | clzeq ip, A_0 |
| 64 | addeq D_0, D_0, ip |
| 65 | @ D_1 = clz B |
| 66 | teq B_1, #0 |
| 67 | clz D_1, B_1 |
| 68 | clzeq ip, B_0 |
| 69 | addeq D_1, D_1, ip |
| 70 | @ if clz B - clz A > 0 |
| 71 | subs D_0, D_1, D_0 |
| 72 | bls L_done_shift |
| 73 | @ B <<= (clz B - clz A) |
| 74 | subs D_1, D_0, #32 |
| 75 | rsb ip, D_0, #32 |
| 76 | movmi B_1, B_1, lsl D_0 |
| 77 | ARM( orrmi B_1, B_1, B_0, lsr ip ) |
| 78 | THUMB( lsrmi TMP, B_0, ip ) |
| 79 | THUMB( orrmi B_1, B_1, TMP ) |
| 80 | movpl B_1, B_0, lsl D_1 |
| 81 | mov B_0, B_0, lsl D_0 |
| 82 | @ C = 1 << (clz B - clz A) |
| 83 | movmi C_1, C_1, lsl D_0 |
| 84 | ARM( orrmi C_1, C_1, C_0, lsr ip ) |
| 85 | THUMB( lsrmi TMP, C_0, ip ) |
| 86 | THUMB( orrmi C_1, C_1, TMP ) |
| 87 | movpl C_1, C_0, lsl D_1 |
| 88 | mov C_0, C_0, lsl D_0 |
| 89 | L_done_shift: |
| 90 | mov D_0, #0 |
| 91 | mov D_1, #0 |
| 92 | @ C: current bit; D: result |
| 93 | #else |
| 94 | @ C: current bit; D: result |
| 95 | mov C_0, #1 |
| 96 | mov C_1, #0 |
| 97 | mov D_0, #0 |
| 98 | mov D_1, #0 |
| 99 | L_lsl_4: |
| 100 | cmp B_1, #0x10000000 |
| 101 | cmpcc B_1, A_1 |
| 102 | cmpeq B_0, A_0 |
| 103 | bcs L_lsl_1 |
| 104 | @ B <<= 4 |
| 105 | mov B_1, B_1, lsl #4 |
| 106 | orr B_1, B_1, B_0, lsr #28 |
| 107 | mov B_0, B_0, lsl #4 |
| 108 | @ C <<= 4 |
| 109 | mov C_1, C_1, lsl #4 |
| 110 | orr C_1, C_1, C_0, lsr #28 |
| 111 | mov C_0, C_0, lsl #4 |
| 112 | b L_lsl_4 |
| 113 | L_lsl_1: |
| 114 | cmp B_1, #0x80000000 |
| 115 | cmpcc B_1, A_1 |
| 116 | cmpeq B_0, A_0 |
| 117 | bcs L_subtract |
| 118 | @ B <<= 1 |
| 119 | mov B_1, B_1, lsl #1 |
| 120 | orr B_1, B_1, B_0, lsr #31 |
| 121 | mov B_0, B_0, lsl #1 |
| 122 | @ C <<= 1 |
| 123 | mov C_1, C_1, lsl #1 |
| 124 | orr C_1, C_1, C_0, lsr #31 |
| 125 | mov C_0, C_0, lsl #1 |
| 126 | b L_lsl_1 |
| 127 | #endif |
| 128 | L_subtract: |
| 129 | @ if A >= B |
| 130 | cmp A_1, B_1 |
| 131 | cmpeq A_0, B_0 |
| 132 | bcc L_update |
| 133 | @ A -= B |
| 134 | subs A_0, A_0, B_0 |
| 135 | sbc A_1, A_1, B_1 |
| 136 | @ D |= C |
| 137 | orr D_0, D_0, C_0 |
| 138 | orr D_1, D_1, C_1 |
| 139 | L_update: |
| 140 | @ if A == 0: break |
| 141 | orrs ip, A_1, A_0 |
| 142 | beq L_exit |
| 143 | @ C >>= 1 |
| 144 | movs C_1, C_1, lsr #1 |
| 145 | movs C_0, C_0, rrx |
| 146 | @ if C == 0: break |
| 147 | orrs ip, C_1, C_0 |
| 148 | beq L_exit |
| 149 | @ B >>= 1 |
| 150 | movs B_1, B_1, lsr #1 |
| 151 | mov B_0, B_0, rrx |
| 152 | b L_subtract |
| 153 | L_exit: |
| 154 | @ Note: A, B & Q, R are aliases |
| 155 | mov R_0, A_0 |
| 156 | mov R_1, A_1 |
| 157 | mov Q_0, D_0 |
| 158 | mov Q_1, D_1 |
| 159 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 160 | |
| 161 | L_div_32_32: |
| 162 | @ Note: A_0 & r0 are aliases |
| 163 | @ Q_1 r1 |
| 164 | mov r1, B_0 |
| 165 | bl __aeabi_uidivmod |
| 166 | mov R_0, r1 |
| 167 | mov R_1, #0 |
| 168 | mov Q_1, #0 |
| 169 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 170 | |
| 171 | L_pow2: |
| 172 | #ifdef HAVE_CLZ |
| 173 | @ Note: A, B and Q, R are aliases |
| 174 | @ R = A & (B - 1) |
| 175 | and C_0, A_0, C_0 |
| 176 | and C_1, A_1, C_1 |
| 177 | @ Q = A >> log2(B) |
| 178 | @ Note: B must not be 0 here! |
| 179 | clz D_0, B_0 |
| 180 | add D_1, D_0, #1 |
| 181 | rsbs D_0, D_0, #31 |
| 182 | bpl L_1 |
| 183 | clz D_0, B_1 |
| 184 | rsb D_0, D_0, #31 |
| 185 | mov A_0, A_1, lsr D_0 |
| 186 | add D_0, D_0, #32 |
| 187 | L_1: |
| 188 | movpl A_0, A_0, lsr D_0 |
| 189 | ARM( orrpl A_0, A_0, A_1, lsl D_1 ) |
| 190 | THUMB( lslpl TMP, A_1, D_1 ) |
| 191 | THUMB( orrpl A_0, A_0, TMP ) |
| 192 | mov A_1, A_1, lsr D_0 |
| 193 | @ Mov back C to R |
| 194 | mov R_0, C_0 |
| 195 | mov R_1, C_1 |
| 196 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 197 | #else |
| 198 | @ Note: A, B and Q, R are aliases |
| 199 | @ R = A & (B - 1) |
| 200 | and C_0, A_0, C_0 |
| 201 | and C_1, A_1, C_1 |
| 202 | @ Q = A >> log2(B) |
| 203 | @ Note: B must not be 0 here! |
| 204 | @ Count the leading zeroes in B. |
| 205 | mov D_0, #0 |
| 206 | orrs B_0, B_0, B_0 |
| 207 | @ If B is greater than 1 << 31, divide A and B by 1 << 32. |
| 208 | moveq A_0, A_1 |
| 209 | moveq A_1, #0 |
| 210 | moveq B_0, B_1 |
| 211 | @ Count the remaining leading zeroes in B. |
| 212 | movs B_1, B_0, lsl #16 |
| 213 | addeq D_0, #16 |
| 214 | moveq B_0, B_0, lsr #16 |
| 215 | tst B_0, #0xff |
| 216 | addeq D_0, #8 |
| 217 | moveq B_0, B_0, lsr #8 |
| 218 | tst B_0, #0xf |
| 219 | addeq D_0, #4 |
| 220 | moveq B_0, B_0, lsr #4 |
| 221 | tst B_0, #0x3 |
| 222 | addeq D_0, #2 |
| 223 | moveq B_0, B_0, lsr #2 |
| 224 | tst B_0, #0x1 |
| 225 | addeq D_0, #1 |
| 226 | @ Shift A to the right by the appropriate amount. |
| 227 | rsb D_1, D_0, #32 |
| 228 | mov Q_0, A_0, lsr D_0 |
| 229 | orr Q_0, A_1, lsl D_1 |
| 230 | mov Q_1, A_1, lsr D_0 |
| 231 | @ Move C to R |
| 232 | mov R_0, C_0 |
| 233 | mov R_1, C_1 |
| 234 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 235 | #endif |
| 236 | |
| 237 | L_div_by_0: |
| 238 | bl __div0 |
| 239 | @ As wrong as it could be |
| 240 | mov Q_0, #0 |
| 241 | mov Q_1, #0 |
| 242 | mov R_0, #0 |
| 243 | mov R_1, #0 |
| 244 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 245 | ENDPROC(__aeabi_uldivmod) |