Stefan Roese | 4c835a6 | 2018-09-05 15:12:35 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0+ |
| 2 | /* |
| 3 | * Copyright (C) 2018 Stefan Roese <sr@denx.de> |
| 4 | * |
| 5 | * This code is mostly based on the code extracted from this MediaTek |
| 6 | * github repository: |
| 7 | * |
| 8 | * https://github.com/MediaTek-Labs/linkit-smart-uboot.git |
| 9 | * |
| 10 | * I was not able to find a specific license or other developers |
| 11 | * copyrights here, so I can't add them here. |
| 12 | * |
| 13 | * Most functions in this file are copied from the MediaTek U-Boot |
| 14 | * repository. Without any documentation, it was impossible to really |
| 15 | * implement this differently. So its mostly a cleaned-up version of |
| 16 | * the original code, with only support for the MT7628 / MT7688 SoC. |
| 17 | */ |
| 18 | |
| 19 | #include <common.h> |
Simon Glass | 1eb69ae | 2019-11-14 12:57:39 -0700 | [diff] [blame^] | 20 | #include <cpu_func.h> |
Stefan Roese | 4c835a6 | 2018-09-05 15:12:35 +0200 | [diff] [blame] | 21 | #include <linux/io.h> |
| 22 | #include <asm/cacheops.h> |
| 23 | #include <asm/io.h> |
| 24 | #include "mt76xx.h" |
| 25 | |
| 26 | #define NUM_OF_CACHELINE 128 |
| 27 | #define MIN_START 6 |
| 28 | #define MIN_FINE_START 0xf |
| 29 | #define MAX_START 7 |
| 30 | #define MAX_FINE_START 0x0 |
| 31 | |
| 32 | #define CPU_FRAC_DIV 1 |
| 33 | |
| 34 | #if defined(CONFIG_ONBOARD_DDR2_SIZE_256MBIT) |
| 35 | #define DRAM_BUTTOM 0x02000000 |
| 36 | #endif |
| 37 | #if defined(CONFIG_ONBOARD_DDR2_SIZE_512MBIT) |
| 38 | #define DRAM_BUTTOM 0x04000000 |
| 39 | #endif |
| 40 | #if defined(CONFIG_ONBOARD_DDR2_SIZE_1024MBIT) |
| 41 | #define DRAM_BUTTOM 0x08000000 |
| 42 | #endif |
| 43 | #if defined(CONFIG_ONBOARD_DDR2_SIZE_2048MBIT) |
| 44 | #define DRAM_BUTTOM 0x10000000 |
| 45 | #endif |
| 46 | |
| 47 | static inline void cal_memcpy(void *src, void *dst, u32 size) |
| 48 | { |
| 49 | u8 *psrc = (u8 *)src; |
| 50 | u8 *pdst = (u8 *)dst; |
| 51 | int i; |
| 52 | |
| 53 | for (i = 0; i < size; i++, psrc++, pdst++) |
| 54 | *pdst = *psrc; |
| 55 | } |
| 56 | |
| 57 | static inline void cal_memset(void *src, u8 pat, u32 size) |
| 58 | { |
| 59 | u8 *psrc = (u8 *)src; |
| 60 | int i; |
| 61 | |
| 62 | for (i = 0; i < size; i++, psrc++) |
| 63 | *psrc = pat; |
| 64 | } |
| 65 | |
| 66 | #define pref_op(hint, addr) \ |
| 67 | __asm__ __volatile__( \ |
| 68 | ".set push\n" \ |
| 69 | ".set noreorder\n" \ |
| 70 | "pref %0, %1\n" \ |
| 71 | ".set pop\n" \ |
| 72 | : \ |
| 73 | : "i" (hint), "R" (*(u8 *)(addr))) |
| 74 | |
| 75 | static inline void cal_patgen(u32 start_addr, u32 size, u32 bias) |
| 76 | { |
| 77 | u32 *addr = (u32 *)start_addr; |
| 78 | int i; |
| 79 | |
| 80 | for (i = 0; i < size; i++) |
| 81 | addr[i] = start_addr + i + bias; |
| 82 | } |
| 83 | |
| 84 | static inline int test_loop(int k, int dqs, u32 test_dqs, u32 *coarse_dqs, |
| 85 | u32 offs, u32 pat, u32 val) |
| 86 | { |
| 87 | u32 nc_addr; |
| 88 | u32 *c_addr; |
| 89 | int i; |
| 90 | |
| 91 | for (nc_addr = 0xa0000000; |
| 92 | nc_addr < (0xa0000000 + DRAM_BUTTOM - NUM_OF_CACHELINE * 32); |
| 93 | nc_addr += (DRAM_BUTTOM >> 6) + offs) { |
| 94 | writel(0x00007474, (void *)MT76XX_MEMCTRL_BASE + 0x64); |
| 95 | wmb(); /* Make sure store if finished */ |
| 96 | |
| 97 | c_addr = (u32 *)(nc_addr & 0xdfffffff); |
| 98 | cal_memset(((u8 *)c_addr), 0x1F, NUM_OF_CACHELINE * 32); |
| 99 | cal_patgen(nc_addr, NUM_OF_CACHELINE * 8, pat); |
| 100 | |
| 101 | if (dqs > 0) |
| 102 | writel(0x00000074 | |
| 103 | (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 12) | |
| 104 | (((k == 0) ? val : test_dqs) << 8), |
| 105 | (void *)MT76XX_MEMCTRL_BASE + 0x64); |
| 106 | else |
| 107 | writel(0x00007400 | |
| 108 | (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 4) | |
| 109 | (((k == 0) ? val : test_dqs) << 0), |
| 110 | (void *)MT76XX_MEMCTRL_BASE + 0x64); |
| 111 | wmb(); /* Make sure store if finished */ |
| 112 | |
| 113 | invalidate_dcache_range((u32)c_addr, |
| 114 | (u32)c_addr + |
| 115 | NUM_OF_CACHELINE * 32); |
| 116 | wmb(); /* Make sure store if finished */ |
| 117 | |
| 118 | for (i = 0; i < NUM_OF_CACHELINE * 8; i++) { |
| 119 | if (i % 8 == 0) |
| 120 | pref_op(0, &c_addr[i]); |
| 121 | } |
| 122 | |
| 123 | for (i = 0; i < NUM_OF_CACHELINE * 8; i++) { |
| 124 | if (c_addr[i] != nc_addr + i + pat) |
| 125 | return -1; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | return 0; |
| 130 | } |
| 131 | |
| 132 | void ddr_calibrate(void) |
| 133 | { |
| 134 | u32 min_coarse_dqs[2]; |
| 135 | u32 max_coarse_dqs[2]; |
| 136 | u32 min_fine_dqs[2]; |
| 137 | u32 max_fine_dqs[2]; |
| 138 | u32 coarse_dqs[2]; |
| 139 | u32 fine_dqs[2]; |
| 140 | int reg = 0, ddr_cfg2_reg; |
| 141 | int flag; |
| 142 | int i, k; |
| 143 | int dqs = 0; |
| 144 | u32 min_coarse_dqs_bnd, min_fine_dqs_bnd, coarse_dqs_dll, fine_dqs_dll; |
| 145 | u32 val; |
| 146 | u32 fdiv = 0, frac = 0; |
| 147 | |
| 148 | /* Setup clock to run at full speed */ |
| 149 | val = readl((void *)MT76XX_DYN_CFG0_REG); |
| 150 | fdiv = (u32)((val >> 8) & 0x0F); |
| 151 | if (CPU_FRAC_DIV < 1 || CPU_FRAC_DIV > 10) |
| 152 | frac = val & 0x0f; |
| 153 | else |
| 154 | frac = CPU_FRAC_DIV; |
| 155 | |
| 156 | while (frac < fdiv) { |
| 157 | val = readl((void *)MT76XX_DYN_CFG0_REG); |
| 158 | fdiv = (val >> 8) & 0x0f; |
| 159 | fdiv--; |
| 160 | val &= ~(0x0f << 8); |
| 161 | val |= (fdiv << 8); |
| 162 | writel(val, (void *)MT76XX_DYN_CFG0_REG); |
| 163 | udelay(500); |
| 164 | val = readl((void *)MT76XX_DYN_CFG0_REG); |
| 165 | fdiv = (val >> 8) & 0x0f; |
| 166 | } |
| 167 | |
| 168 | clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4)); |
| 169 | ddr_cfg2_reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x48); |
| 170 | clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x48, |
| 171 | (0x3 << 28) | (0x3 << 26)); |
| 172 | |
| 173 | min_coarse_dqs[0] = MIN_START; |
| 174 | min_coarse_dqs[1] = MIN_START; |
| 175 | min_fine_dqs[0] = MIN_FINE_START; |
| 176 | min_fine_dqs[1] = MIN_FINE_START; |
| 177 | max_coarse_dqs[0] = MAX_START; |
| 178 | max_coarse_dqs[1] = MAX_START; |
| 179 | max_fine_dqs[0] = MAX_FINE_START; |
| 180 | max_fine_dqs[1] = MAX_FINE_START; |
| 181 | dqs = 0; |
| 182 | |
| 183 | /* Add by KP, DQS MIN boundary */ |
| 184 | reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x20); |
| 185 | coarse_dqs_dll = (reg & 0xf00) >> 8; |
| 186 | fine_dqs_dll = (reg & 0xf0) >> 4; |
| 187 | if (coarse_dqs_dll <= 8) |
| 188 | min_coarse_dqs_bnd = 8 - coarse_dqs_dll; |
| 189 | else |
| 190 | min_coarse_dqs_bnd = 0; |
| 191 | |
| 192 | if (fine_dqs_dll <= 8) |
| 193 | min_fine_dqs_bnd = 8 - fine_dqs_dll; |
| 194 | else |
| 195 | min_fine_dqs_bnd = 0; |
| 196 | /* DQS MIN boundary */ |
| 197 | |
| 198 | DQS_CAL: |
| 199 | |
| 200 | for (k = 0; k < 2; k++) { |
| 201 | u32 test_dqs; |
| 202 | |
| 203 | if (k == 0) |
| 204 | test_dqs = MAX_START; |
| 205 | else |
| 206 | test_dqs = MAX_FINE_START; |
| 207 | |
| 208 | do { |
| 209 | flag = test_loop(k, dqs, test_dqs, max_coarse_dqs, |
| 210 | 0x400, 0x3, 0xf); |
| 211 | if (flag == -1) |
| 212 | break; |
| 213 | |
| 214 | test_dqs++; |
| 215 | } while (test_dqs <= 0xf); |
| 216 | |
| 217 | if (k == 0) { |
| 218 | max_coarse_dqs[dqs] = test_dqs; |
| 219 | } else { |
| 220 | test_dqs--; |
| 221 | |
| 222 | if (test_dqs == MAX_FINE_START - 1) { |
| 223 | max_coarse_dqs[dqs]--; |
| 224 | max_fine_dqs[dqs] = 0xf; |
| 225 | } else { |
| 226 | max_fine_dqs[dqs] = test_dqs; |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | for (k = 0; k < 2; k++) { |
| 232 | u32 test_dqs; |
| 233 | |
| 234 | if (k == 0) |
| 235 | test_dqs = MIN_START; |
| 236 | else |
| 237 | test_dqs = MIN_FINE_START; |
| 238 | |
| 239 | do { |
| 240 | flag = test_loop(k, dqs, test_dqs, min_coarse_dqs, |
| 241 | 0x480, 0x1, 0x0); |
| 242 | if (k == 0) { |
| 243 | if (flag == -1 || |
| 244 | test_dqs == min_coarse_dqs_bnd) |
| 245 | break; |
| 246 | |
| 247 | test_dqs--; |
| 248 | |
| 249 | if (test_dqs < min_coarse_dqs_bnd) |
| 250 | break; |
| 251 | } else { |
| 252 | if (flag == -1) { |
| 253 | test_dqs++; |
| 254 | break; |
| 255 | } else if (test_dqs == min_fine_dqs_bnd) { |
| 256 | break; |
| 257 | } |
| 258 | |
| 259 | test_dqs--; |
| 260 | |
| 261 | if (test_dqs < min_fine_dqs_bnd) |
| 262 | break; |
| 263 | } |
| 264 | } while (test_dqs >= 0); |
| 265 | |
| 266 | if (k == 0) { |
| 267 | min_coarse_dqs[dqs] = test_dqs; |
| 268 | } else { |
| 269 | if (test_dqs == MIN_FINE_START + 1) { |
| 270 | min_coarse_dqs[dqs]++; |
| 271 | min_fine_dqs[dqs] = 0x0; |
| 272 | } else { |
| 273 | min_fine_dqs[dqs] = test_dqs; |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | if (dqs == 0) { |
| 279 | dqs = 1; |
| 280 | goto DQS_CAL; |
| 281 | } |
| 282 | |
| 283 | for (i = 0; i < 2; i++) { |
| 284 | u32 temp; |
| 285 | |
| 286 | coarse_dqs[i] = (max_coarse_dqs[i] + min_coarse_dqs[i]) >> 1; |
| 287 | temp = |
| 288 | (((max_coarse_dqs[i] + min_coarse_dqs[i]) % 2) * 4) + |
| 289 | ((max_fine_dqs[i] + min_fine_dqs[i]) >> 1); |
| 290 | if (temp >= 0x10) { |
| 291 | coarse_dqs[i]++; |
| 292 | fine_dqs[i] = (temp - 0x10) + 0x8; |
| 293 | } else { |
| 294 | fine_dqs[i] = temp; |
| 295 | } |
| 296 | } |
| 297 | reg = (coarse_dqs[1] << 12) | (fine_dqs[1] << 8) | |
| 298 | (coarse_dqs[0] << 4) | fine_dqs[0]; |
| 299 | |
| 300 | clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4)); |
| 301 | writel(reg, (void *)MT76XX_MEMCTRL_BASE + 0x64); |
| 302 | writel(ddr_cfg2_reg, (void *)MT76XX_MEMCTRL_BASE + 0x48); |
| 303 | setbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4)); |
| 304 | |
| 305 | for (i = 0; i < 2; i++) |
| 306 | debug("[%02X%02X%02X%02X]", min_coarse_dqs[i], |
| 307 | min_fine_dqs[i], max_coarse_dqs[i], max_fine_dqs[i]); |
| 308 | debug("\nDDR Calibration DQS reg = %08X\n", reg); |
| 309 | } |