blob: 65b4020ebe6152984edeb84b31c5f016dd1c908f [file] [log] [blame]
Qu Wenruo565a4142020-06-24 18:02:48 +02001// SPDX-License-Identifier: GPL-2.0+
2#include <common.h>
3#include <fs_internal.h>
Qu Wenruo4aebb992020-06-24 18:02:49 +02004#include <uuid.h>
5#include <memalign.h>
6#include "kernel-shared/btrfs_tree.h"
Qu Wenruo565a4142020-06-24 18:02:48 +02007#include "disk-io.h"
Qu Wenruo4aebb992020-06-24 18:02:49 +02008#include "ctree.h"
9#include "btrfs.h"
Qu Wenruo75b08172020-06-24 18:02:55 +020010#include "volumes.h"
11#include "extent-io.h"
Qu Wenruo565a4142020-06-24 18:02:48 +020012#include "crypto/hash.h"
13
Qu Wenruo75b08172020-06-24 18:02:55 +020014/* specified errno for check_tree_block */
15#define BTRFS_BAD_BYTENR (-1)
16#define BTRFS_BAD_FSID (-2)
17#define BTRFS_BAD_LEVEL (-3)
18#define BTRFS_BAD_NRITEMS (-4)
19
20/* Calculate max possible nritems for a leaf/node */
21static u32 max_nritems(u8 level, u32 nodesize)
22{
23
24 if (level == 0)
25 return ((nodesize - sizeof(struct btrfs_header)) /
26 sizeof(struct btrfs_item));
27 return ((nodesize - sizeof(struct btrfs_header)) /
28 sizeof(struct btrfs_key_ptr));
29}
30
31static int check_tree_block(struct btrfs_fs_info *fs_info,
32 struct extent_buffer *buf)
33{
34
35 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
36 u32 nodesize = fs_info->nodesize;
37 bool fsid_match = false;
38 int ret = BTRFS_BAD_FSID;
39
40 if (buf->start != btrfs_header_bytenr(buf))
41 return BTRFS_BAD_BYTENR;
42 if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
43 return BTRFS_BAD_LEVEL;
44 if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
45 nodesize))
46 return BTRFS_BAD_NRITEMS;
47
48 /* Only leaf can be empty */
49 if (btrfs_header_nritems(buf) == 0 &&
50 btrfs_header_level(buf) != 0)
51 return BTRFS_BAD_NRITEMS;
52
53 while (fs_devices) {
54 /*
55 * Checking the incompat flag is only valid for the current
56 * fs. For seed devices it's forbidden to have their uuid
57 * changed so reading ->fsid in this case is fine
58 */
59 if (fs_devices == fs_info->fs_devices &&
60 btrfs_fs_incompat(fs_info, METADATA_UUID))
61 fsid_match = !memcmp_extent_buffer(buf,
62 fs_devices->metadata_uuid,
63 btrfs_header_fsid(),
64 BTRFS_FSID_SIZE);
65 else
66 fsid_match = !memcmp_extent_buffer(buf,
67 fs_devices->fsid,
68 btrfs_header_fsid(),
69 BTRFS_FSID_SIZE);
70
71
72 if (fsid_match) {
73 ret = 0;
74 break;
75 }
76 fs_devices = fs_devices->seed;
77 }
78 return ret;
79}
80
81static void print_tree_block_error(struct btrfs_fs_info *fs_info,
82 struct extent_buffer *eb,
83 int err)
84{
85 char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
86 char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
87 u8 buf[BTRFS_UUID_SIZE];
88
89 if (!err)
90 return;
91
92 fprintf(stderr, "bad tree block %llu, ", eb->start);
93 switch (err) {
94 case BTRFS_BAD_FSID:
95 read_extent_buffer(eb, buf, btrfs_header_fsid(),
96 BTRFS_UUID_SIZE);
97 uuid_unparse(buf, found_uuid);
98 uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
99 fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
100 fs_uuid, found_uuid);
101 break;
102 case BTRFS_BAD_BYTENR:
103 fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
104 eb->start, btrfs_header_bytenr(eb));
105 break;
106 case BTRFS_BAD_LEVEL:
107 fprintf(stderr, "bad level, %u > %d\n",
108 btrfs_header_level(eb), BTRFS_MAX_LEVEL);
109 break;
110 case BTRFS_BAD_NRITEMS:
111 fprintf(stderr, "invalid nr_items: %u\n",
112 btrfs_header_nritems(eb));
113 break;
114 }
115}
116
Qu Wenruo565a4142020-06-24 18:02:48 +0200117int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
118{
119 memset(out, 0, BTRFS_CSUM_SIZE);
120
121 switch (csum_type) {
122 case BTRFS_CSUM_TYPE_CRC32:
123 return hash_crc32c(data, len, out);
124 case BTRFS_CSUM_TYPE_XXHASH:
125 return hash_xxhash(data, len, out);
126 case BTRFS_CSUM_TYPE_SHA256:
127 return hash_sha256(data, len, out);
128 default:
129 printf("Unknown csum type %d\n", csum_type);
130 return -EINVAL;
131 }
132}
Qu Wenruo4aebb992020-06-24 18:02:49 +0200133
134/*
135 * Check if the super is valid:
136 * - nodesize/sectorsize - minimum, maximum, alignment
137 * - tree block starts - alignment
138 * - number of devices - something sane
139 * - sys array size - maximum
140 */
141static int btrfs_check_super(struct btrfs_super_block *sb)
142{
143 u8 result[BTRFS_CSUM_SIZE];
144 u16 csum_type;
145 int csum_size;
146 u8 *metadata_uuid;
147
148 if (btrfs_super_magic(sb) != BTRFS_MAGIC)
149 return -EIO;
150
151 csum_type = btrfs_super_csum_type(sb);
152 if (csum_type >= btrfs_super_num_csums()) {
153 error("unsupported checksum algorithm %u", csum_type);
154 return -EIO;
155 }
156 csum_size = btrfs_super_csum_size(sb);
157
158 btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
159 result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
160
161 if (memcmp(result, sb->csum, csum_size)) {
162 error("superblock checksum mismatch");
163 return -EIO;
164 }
165 if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
166 error("tree_root level too big: %d >= %d",
167 btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
168 goto error_out;
169 }
170 if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
171 error("chunk_root level too big: %d >= %d",
172 btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
173 goto error_out;
174 }
175 if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
176 error("log_root level too big: %d >= %d",
177 btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
178 goto error_out;
179 }
180
181 if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
182 error("tree_root block unaligned: %llu", btrfs_super_root(sb));
183 goto error_out;
184 }
185 if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
186 error("chunk_root block unaligned: %llu",
187 btrfs_super_chunk_root(sb));
188 goto error_out;
189 }
190 if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
191 error("log_root block unaligned: %llu",
192 btrfs_super_log_root(sb));
193 goto error_out;
194 }
195 if (btrfs_super_nodesize(sb) < 4096) {
196 error("nodesize too small: %u < 4096",
197 btrfs_super_nodesize(sb));
198 goto error_out;
199 }
200 if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
201 error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
202 goto error_out;
203 }
204 if (btrfs_super_sectorsize(sb) < 4096) {
205 error("sectorsize too small: %u < 4096",
206 btrfs_super_sectorsize(sb));
207 goto error_out;
208 }
209 if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
210 error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
211 goto error_out;
212 }
213 if (btrfs_super_total_bytes(sb) == 0) {
214 error("invalid total_bytes 0");
215 goto error_out;
216 }
217 if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
218 error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
219 goto error_out;
220 }
221 if ((btrfs_super_stripesize(sb) != 4096)
222 && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
223 error("invalid stripesize %u", btrfs_super_stripesize(sb));
224 goto error_out;
225 }
226
227 if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
228 metadata_uuid = sb->metadata_uuid;
229 else
230 metadata_uuid = sb->fsid;
231
232 if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
233 char fsid[BTRFS_UUID_UNPARSED_SIZE];
234 char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
235
236 uuid_unparse(sb->metadata_uuid, fsid);
237 uuid_unparse(sb->dev_item.fsid, dev_fsid);
238 error("dev_item UUID does not match fsid: %s != %s",
239 dev_fsid, fsid);
240 goto error_out;
241 }
242
243 /*
244 * Hint to catch really bogus numbers, bitflips or so
245 */
246 if (btrfs_super_num_devices(sb) > (1UL << 31)) {
247 error("suspicious number of devices: %llu",
248 btrfs_super_num_devices(sb));
249 }
250
251 if (btrfs_super_num_devices(sb) == 0) {
252 error("number of devices is 0");
253 goto error_out;
254 }
255
256 /*
257 * Obvious sys_chunk_array corruptions, it must hold at least one key
258 * and one chunk
259 */
260 if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
261 error("system chunk array too big %u > %u",
262 btrfs_super_sys_array_size(sb),
263 BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
264 goto error_out;
265 }
266 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
267 + sizeof(struct btrfs_chunk)) {
268 error("system chunk array too small %u < %zu",
269 btrfs_super_sys_array_size(sb),
270 sizeof(struct btrfs_disk_key) +
271 sizeof(struct btrfs_chunk));
272 goto error_out;
273 }
274
275 return 0;
276
277error_out:
278 error("superblock checksum matches but it has invalid members");
279 return -EIO;
280}
281
282/*
283 * btrfs_read_dev_super - read a valid primary superblock from a block device
284 * @desc,@part: file descriptor of the device
285 * @sb: buffer where the superblock is going to be read in
286 *
287 * Unlike the btrfs-progs/kernel version, here we ony care about the first
288 * super block, thus it's much simpler.
289 */
290int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
291 struct btrfs_super_block *sb)
292{
293 char tmp[BTRFS_SUPER_INFO_SIZE];
294 struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
295 int ret;
296
297 ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
298 BTRFS_SUPER_INFO_OFFSET);
299 if (ret < BTRFS_SUPER_INFO_SIZE)
300 return -EIO;
301
302 if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
303 return -EIO;
304
305 if (btrfs_check_super(buf))
306 return -EIO;
307
308 memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
309 return 0;
310}
311
312int btrfs_read_superblock(void)
313{
314 ALLOC_CACHE_ALIGN_BUFFER(char, raw_sb, BTRFS_SUPER_INFO_SIZE);
315 struct btrfs_super_block *sb = (struct btrfs_super_block *) raw_sb;
316 int ret;
317
318
319 btrfs_info.sb.generation = 0;
320
321 ret = btrfs_read_dev_super(btrfs_blk_desc, btrfs_part_info, sb);
322 if (ret < 0) {
323 pr_debug("%s: No valid BTRFS superblock found!\n", __func__);
324 return ret;
325 }
326 btrfs_super_block_to_cpu(sb);
327 memcpy(&btrfs_info.sb, sb, sizeof(*sb));
328
329 if (btrfs_info.sb.num_devices != 1) {
330 printf("%s: Unsupported number of devices (%lli). This driver "
331 "only supports filesystem on one device.\n", __func__,
332 btrfs_info.sb.num_devices);
333 return -1;
334 }
335
336 pr_debug("Chosen superblock with generation = %llu\n",
337 btrfs_info.sb.generation);
338
339 return 0;
340}
Qu Wenruo75b08172020-06-24 18:02:55 +0200341
342static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
343 int verify, int silent, u16 csum_type)
344{
345 u8 result[BTRFS_CSUM_SIZE];
346 u32 len;
347
348 len = buf->len - BTRFS_CSUM_SIZE;
349 btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
350 result, len);
351
352 if (verify) {
353 if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
354 /* FIXME: format */
355 if (!silent)
356 printk("checksum verify failed on %llu found %08X wanted %08X\n",
357 (unsigned long long)buf->start,
358 result[0],
359 buf->data[0]);
360 return 1;
361 }
362 } else {
363 write_extent_buffer(buf, result, 0, csum_size);
364 }
365 return 0;
366}
367
368int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
369 u16 csum_type)
370{
371 return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
372}
373
374static int csum_tree_block(struct btrfs_fs_info *fs_info,
375 struct extent_buffer *buf, int verify)
376{
377 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
378 u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
379
380 return csum_tree_block_size(buf, csum_size, verify, csum_type);
381}
382
383struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
384 u64 bytenr, u32 blocksize)
385{
386 return find_extent_buffer(&fs_info->extent_cache,
387 bytenr, blocksize);
388}
389
390struct extent_buffer* btrfs_find_create_tree_block(
391 struct btrfs_fs_info *fs_info, u64 bytenr)
392{
393 return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
394}
395
396static int verify_parent_transid(struct extent_io_tree *io_tree,
397 struct extent_buffer *eb, u64 parent_transid,
398 int ignore)
399{
400 int ret;
401
402 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
403 return 0;
404
405 if (extent_buffer_uptodate(eb) &&
406 btrfs_header_generation(eb) == parent_transid) {
407 ret = 0;
408 goto out;
409 }
410 printk("parent transid verify failed on %llu wanted %llu found %llu\n",
411 (unsigned long long)eb->start,
412 (unsigned long long)parent_transid,
413 (unsigned long long)btrfs_header_generation(eb));
414 if (ignore) {
415 eb->flags |= EXTENT_BAD_TRANSID;
416 printk("Ignoring transid failure\n");
417 return 0;
418 }
419
420 ret = 1;
421out:
422 clear_extent_buffer_uptodate(eb);
423 return ret;
424
425}
426
427int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
428{
429 int ret;
430
431 ret = extent_buffer_uptodate(buf);
432 if (!ret)
433 return ret;
434
435 ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
436 parent_transid, 1);
437 return !ret;
438}
439
440int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
441{
442 return set_extent_buffer_uptodate(eb);
443}
444
445int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
446{
447 unsigned long offset = 0;
448 struct btrfs_multi_bio *multi = NULL;
449 struct btrfs_device *device;
450 int ret = 0;
451 u64 read_len;
452 unsigned long bytes_left = eb->len;
453
454 while (bytes_left) {
455 read_len = bytes_left;
456 device = NULL;
457
458 ret = btrfs_map_block(info, READ, eb->start + offset,
459 &read_len, &multi, mirror, NULL);
460 if (ret) {
461 printk("Couldn't map the block %Lu\n", eb->start + offset);
462 kfree(multi);
463 return -EIO;
464 }
465 device = multi->stripes[0].dev;
466
467 if (!device->desc || !device->part) {
468 kfree(multi);
469 return -EIO;
470 }
471
472 if (read_len > bytes_left)
473 read_len = bytes_left;
474
475 ret = read_extent_from_disk(device->desc, device->part,
476 multi->stripes[0].physical, eb,
477 offset, read_len);
478 kfree(multi);
479 multi = NULL;
480
481 if (ret)
482 return -EIO;
483 offset += read_len;
484 bytes_left -= read_len;
485 }
486 return 0;
487}
488
489struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
490 u64 parent_transid)
491{
492 int ret;
493 struct extent_buffer *eb;
494 u64 best_transid = 0;
495 u32 sectorsize = fs_info->sectorsize;
496 int mirror_num = 1;
497 int good_mirror = 0;
498 int candidate_mirror = 0;
499 int num_copies;
500 int ignore = 0;
501
502 /*
503 * Don't even try to create tree block for unaligned tree block
504 * bytenr.
505 * Such unaligned tree block will free overlapping extent buffer,
506 * causing use-after-free bugs for fuzzed images.
507 */
508 if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
509 error("tree block bytenr %llu is not aligned to sectorsize %u",
510 bytenr, sectorsize);
511 return ERR_PTR(-EIO);
512 }
513
514 eb = btrfs_find_create_tree_block(fs_info, bytenr);
515 if (!eb)
516 return ERR_PTR(-ENOMEM);
517
518 if (btrfs_buffer_uptodate(eb, parent_transid))
519 return eb;
520
521 num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
522 while (1) {
523 ret = read_whole_eb(fs_info, eb, mirror_num);
524 if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
525 check_tree_block(fs_info, eb) == 0 &&
526 verify_parent_transid(&fs_info->extent_cache, eb,
527 parent_transid, ignore) == 0) {
528 /*
529 * check_tree_block() is less strict to allow btrfs
530 * check to get raw eb with bad key order and fix it.
531 * But we still need to try to get a good copy if
532 * possible, or bad key order can go into tools like
533 * btrfs ins dump-tree.
534 */
535 if (btrfs_header_level(eb))
536 ret = btrfs_check_node(fs_info, NULL, eb);
537 else
538 ret = btrfs_check_leaf(fs_info, NULL, eb);
539 if (!ret || candidate_mirror == mirror_num) {
540 btrfs_set_buffer_uptodate(eb);
541 return eb;
542 }
543 if (candidate_mirror <= 0)
544 candidate_mirror = mirror_num;
545 }
546 if (ignore) {
547 if (candidate_mirror > 0) {
548 mirror_num = candidate_mirror;
549 continue;
550 }
551 if (check_tree_block(fs_info, eb))
552 print_tree_block_error(fs_info, eb,
553 check_tree_block(fs_info, eb));
554 else
555 fprintf(stderr, "Csum didn't match\n");
556 ret = -EIO;
557 break;
558 }
559 if (num_copies == 1) {
560 ignore = 1;
561 continue;
562 }
563 if (btrfs_header_generation(eb) > best_transid) {
564 best_transid = btrfs_header_generation(eb);
565 good_mirror = mirror_num;
566 }
567 mirror_num++;
568 if (mirror_num > num_copies) {
569 if (candidate_mirror > 0)
570 mirror_num = candidate_mirror;
571 else
572 mirror_num = good_mirror;
573 ignore = 1;
574 continue;
575 }
576 }
577 /*
578 * We failed to read this tree block, it be should deleted right now
579 * to avoid stale cache populate the cache.
580 */
581 free_extent_buffer(eb);
582 return ERR_PTR(ret);
583}