-
-
Notifications
You must be signed in to change notification settings - Fork 36
/
spaceman.c
1433 lines (1275 loc) · 40.5 KB
/
spaceman.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2019 Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
*/
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include "apfs.h"
/**
* apfs_spaceman_read_cib_addr - Get the address of a cib from the spaceman
* @sb: superblock structure
* @index: index of the chunk-info block
*
* Returns the block number for the chunk-info block.
*
* This is not described in the official documentation; credit for figuring it
* out should go to Joachim Metz: <https://github.com/libyal/libfsapfs>.
*/
static u64 apfs_spaceman_read_cib_addr(struct super_block *sb, int index)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
u32 offset;
__le64 *addr_p;
offset = sm->sm_addr_offset + index * sizeof(*addr_p);
addr_p = (void *)sm_raw + offset;
return le64_to_cpup(addr_p);
}
/**
* apfs_spaceman_write_cib_addr - Store the address of a cib in the spaceman
* @sb: superblock structure
* @index: index of the chunk-info block
* @addr: address of the chunk-info block
*/
static void apfs_spaceman_write_cib_addr(struct super_block *sb,
int index, u64 addr)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
u32 offset;
__le64 *addr_p;
apfs_assert_in_transaction(sb, &sm_raw->sm_o);
offset = sm->sm_addr_offset + index * sizeof(*addr_p);
addr_p = (void *)sm_raw + offset;
*addr_p = cpu_to_le64(addr);
}
/**
* apfs_max_chunks_per_cib - Find the maximum chunk count for a chunk-info block
* @sb: superblock structure
*/
static inline int apfs_max_chunks_per_cib(struct super_block *sb)
{
return (sb->s_blocksize - sizeof(struct apfs_chunk_info_block)) /
sizeof(struct apfs_chunk_info);
}
/**
* apfs_read_spaceman_dev - Read a space manager device structure
* @sb: superblock structure
* @dev: on-disk device structure
*
* Initializes the in-memory spaceman fields related to the main device; fusion
* drives are not yet supported. Returns 0 on success, or a negative error code
* in case of failure.
*/
static int apfs_read_spaceman_dev(struct super_block *sb,
struct apfs_spaceman_device *dev)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
if (dev->sm_cab_count) {
apfs_err(sb, "large devices are not supported");
return -EINVAL;
}
spaceman->sm_block_count = le64_to_cpu(dev->sm_block_count);
spaceman->sm_chunk_count = le64_to_cpu(dev->sm_chunk_count);
spaceman->sm_cib_count = le32_to_cpu(dev->sm_cib_count);
spaceman->sm_free_count = le64_to_cpu(dev->sm_free_count);
spaceman->sm_addr_offset = le32_to_cpu(dev->sm_addr_offset);
/* Check that all the cib addresses fit in the spaceman object */
if ((long long)spaceman->sm_addr_offset +
(long long)spaceman->sm_cib_count * sizeof(u64) > spaceman->sm_size) {
apfs_err(sb, "too many cibs (%u)", spaceman->sm_cib_count);
return -EFSCORRUPTED;
}
return 0;
}
/**
* apfs_spaceman_get_16 - Get a 16-bit value from an offset in the spaceman
* @sb: superblock structure
* @off: offset for the value
*
* Returns a pointer to the value, or NULL if it doesn't fit.
*/
static __le16 *apfs_spaceman_get_16(struct super_block *sb, size_t off)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
if (off > spaceman->sm_size)
return NULL;
if (off + sizeof(__le16) > spaceman->sm_size)
return NULL;
return (void *)sm_raw + off;
}
/**
* apfs_spaceman_get_64 - Get a 64-bit value from an offset in the spaceman
* @sb: superblock structure
* @off: offset for the value
*
* Returns a pointer to the value, or NULL if it doesn't fit.
*/
static __le64 *apfs_spaceman_get_64(struct super_block *sb, size_t off)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
if (off > spaceman->sm_size)
return NULL;
if (off + sizeof(__le64) > spaceman->sm_size)
return NULL;
return (void *)sm_raw + off;
}
/**
* apfs_allocate_ip_bitmap - Allocate a free ip bitmap block
* @sb: filesystem superblock
* @offset_p: on return, the offset from sm_ip_bm_base of the allocated block
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_allocate_ip_bitmap(struct super_block *sb, u16 *offset_p)
{
struct apfs_spaceman *spaceman = NULL;
struct apfs_spaceman_phys *sm_raw = NULL;
u32 free_next_offset, old_head_off;
u16 free_head, blkcnt;
__le16 *old_head_p = NULL;
spaceman = APFS_SM(sb);
sm_raw = spaceman->sm_raw;
free_next_offset = le32_to_cpu(sm_raw->sm_ip_bm_free_next_offset);
free_head = le16_to_cpu(sm_raw->sm_ip_bm_free_head);
blkcnt = (u16)le32_to_cpu(sm_raw->sm_ip_bm_block_count);
/*
* The "free_next" array is a linked list of free blocks that starts
* with the "free_head". Allocate this head then, and make the next
* block into the new head.
*/
old_head_off = free_next_offset + free_head * sizeof(*old_head_p);
old_head_p = apfs_spaceman_get_16(sb, old_head_off);
if (!old_head_p) {
apfs_err(sb, "free_next head offset out of bounds (%u)", old_head_off);
return -EFSCORRUPTED;
}
*offset_p = free_head;
free_head = le16_to_cpup(old_head_p);
sm_raw->sm_ip_bm_free_head = *old_head_p;
/* No longer free, no longer part of the linked list */
*old_head_p = cpu_to_le16(APFS_SPACEMAN_IP_BM_INDEX_INVALID);
/* Just a little sanity check because I've messed this up before */
if (free_head >= blkcnt || *offset_p >= blkcnt) {
apfs_err(sb, "free next list seems empty or corrupt");
return -EFSCORRUPTED;
}
return 0;
}
/**
* apfs_free_ip_bitmap - Free a used ip bitmap block
* @sb: filesystem superblock
* @offset: the offset from sm_ip_bm_base of the block to free
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_free_ip_bitmap(struct super_block *sb, u16 offset)
{
struct apfs_spaceman *spaceman = NULL;
struct apfs_spaceman_phys *sm_raw = NULL;
u32 free_next_offset, old_tail_off;
u16 free_tail;
__le16 *old_tail_p = NULL;
spaceman = APFS_SM(sb);
sm_raw = spaceman->sm_raw;
free_next_offset = le32_to_cpu(sm_raw->sm_ip_bm_free_next_offset);
free_tail = le16_to_cpu(sm_raw->sm_ip_bm_free_tail);
/*
* The "free_next" array is a linked list of free blocks that ends
* with the "free_tail". The block getting freed will become the new
* tail of the list.
*/
old_tail_off = free_next_offset + free_tail * sizeof(*old_tail_p);
old_tail_p = apfs_spaceman_get_16(sb, old_tail_off);
if (!old_tail_p) {
apfs_err(sb, "free_next tail offset out of bounds (%u)", old_tail_off);
return -EFSCORRUPTED;
}
*old_tail_p = cpu_to_le16(offset);
sm_raw->sm_ip_bm_free_tail = cpu_to_le16(offset);
free_tail = offset;
return 0;
}
/**
* apfs_reallocate_ip_bitmap - Find a new block for an ip bitmap
* @sb: filesystem superblock
* @offset_p: the offset from sm_ip_bm_base of the block to free
*
* On success returns 0 and updates @offset_p to the new offset allocated for
* the ip bitmap. Since blocks are allocated at the head of the list and freed
* at the tail, there is no risk of reuse by future reallocations within the
* same transaction (under there is some serious corruption, of course).
*
* Returns a negative error code in case of failure.
*/
static int apfs_reallocate_ip_bitmap(struct super_block *sb, __le16 *offset_p)
{
int err;
u16 offset;
offset = le16_to_cpup(offset_p);
err = apfs_free_ip_bitmap(sb, offset);
if (err) {
apfs_err(sb, "failed to free ip bitmap %u", offset);
return err;
}
err = apfs_allocate_ip_bitmap(sb, &offset);
if (err) {
apfs_err(sb, "failed to allocate a new ip bitmap block");
return err;
}
*offset_p = cpu_to_le16(offset);
return 0;
}
/**
* apfs_write_single_ip_bitmap - Write a single ip bitmap to disk
* @sb: filesystem superblock
* @bitmap: bitmap to write
* @idx: index of the ip bitmap to write
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_write_single_ip_bitmap(struct super_block *sb, char *bitmap, u32 idx)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
struct buffer_head *bh = NULL;
u64 ip_bm_base, ip_bitmap_bno;
u32 xid_off, ip_bitmap_off;
__le64 *xid_p = NULL;
__le16 *ip_bitmap_p = NULL;
int err;
ip_bm_base = le64_to_cpu(sm_raw->sm_ip_bm_base);
/* First update the xid, which is kept in a separate array */
xid_off = le32_to_cpu(sm_raw->sm_ip_bm_xid_offset) + idx * sizeof(*xid_p);
xid_p = apfs_spaceman_get_64(sb, xid_off);
if (!xid_p) {
apfs_err(sb, "xid out of bounds (%u)", xid_off);
return -EFSCORRUPTED;
}
*xid_p = cpu_to_le64(nxi->nx_xid);
/* Now get find new location for the ip bitmap (and free the old one) */
ip_bitmap_off = le32_to_cpu(sm_raw->sm_ip_bitmap_offset) + idx * sizeof(*ip_bitmap_p);
ip_bitmap_p = apfs_spaceman_get_16(sb, ip_bitmap_off);
if (!ip_bitmap_p) {
apfs_err(sb, "bmap offset out of bounds (%u)", ip_bitmap_off);
return -EFSCORRUPTED;
}
err = apfs_reallocate_ip_bitmap(sb, ip_bitmap_p);
if (err) {
apfs_err(sb, "failed to reallocate ip bitmap %u", le16_to_cpup(ip_bitmap_p));
return err;
}
/* Finally, write the dirty bitmap to the new location */
ip_bitmap_bno = ip_bm_base + le16_to_cpup(ip_bitmap_p);
bh = apfs_getblk(sb, ip_bitmap_bno);
if (!bh) {
apfs_err(sb, "failed to map block for CoW (0x%llx)", ip_bitmap_bno);
return -EIO;
}
memcpy(bh->b_data, bitmap, sb->s_blocksize);
err = apfs_transaction_join(sb, bh);
if (err)
goto fail;
bh = NULL;
spaceman->sm_ip_bmaps[idx].dirty = false;
return 0;
fail:
brelse(bh);
bh = NULL;
return err;
}
/**
* apfs_write_ip_bitmaps - Write all dirty ip bitmaps to disk
* @sb: superblock structure
*
* Returns 0 on success or a negative error code in case of failure.
*/
int apfs_write_ip_bitmaps(struct super_block *sb)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
struct apfs_ip_bitmap_block_info *info = NULL;
u32 bmaps_count = spaceman->sm_ip_bmaps_count;
int err;
u32 i;
apfs_assert_in_transaction(sb, &sm_raw->sm_o);
for (i = 0; i < bmaps_count; ++i) {
info = &spaceman->sm_ip_bmaps[i];
if (!info->dirty)
continue;
err = apfs_write_single_ip_bitmap(sb, info->block, i);
if (err) {
apfs_err(sb, "failed to rotate ip bitmap %u", i);
return err;
}
}
return 0;
}
/**
* apfs_read_single_ip_bitmap - Read a single ip bitmap to memory
* @sb: filesystem superblock
* @idx: index of the ip bitmap to read
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_read_single_ip_bitmap(struct super_block *sb, u32 idx)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw;
struct buffer_head *bh = NULL;
char *bitmap = NULL;
u64 ip_bm_base, ip_bitmap_bno;
u32 ip_bitmap_off;
__le16 *ip_bitmap_p = NULL;
int err;
ip_bm_base = le64_to_cpu(sm_raw->sm_ip_bm_base);
ip_bitmap_off = le32_to_cpu(sm_raw->sm_ip_bitmap_offset) + idx * sizeof(*ip_bitmap_p);
ip_bitmap_p = apfs_spaceman_get_16(sb, ip_bitmap_off);
if (!ip_bitmap_p) {
apfs_err(sb, "bmap offset out of bounds (%u)", ip_bitmap_off);
return -EFSCORRUPTED;
}
bitmap = kmalloc(sb->s_blocksize, GFP_KERNEL);
if (!bitmap)
return -ENOMEM;
ip_bitmap_bno = ip_bm_base + le16_to_cpup(ip_bitmap_p);
bh = apfs_sb_bread(sb, ip_bitmap_bno);
if (!bh) {
apfs_err(sb, "failed to read ip bitmap (0x%llx)", ip_bitmap_bno);
err = -EIO;
goto fail;
}
memcpy(bitmap, bh->b_data, sb->s_blocksize);
brelse(bh);
bh = NULL;
spaceman->sm_ip_bmaps[idx].dirty = false;
spaceman->sm_ip_bmaps[idx].block = bitmap;
bitmap = NULL;
return 0;
fail:
kfree(bitmap);
bitmap = NULL;
return err;
}
/**
* apfs_read_ip_bitmaps - Read all the ip bitmaps to memory
* @sb: superblock structure
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_read_ip_bitmaps(struct super_block *sb)
{
struct apfs_spaceman *spaceman = APFS_SM(sb);
u32 bmaps_count = spaceman->sm_ip_bmaps_count;
int err;
u32 i;
for (i = 0; i < bmaps_count; ++i) {
err = apfs_read_single_ip_bitmap(sb, i);
if (err) {
apfs_err(sb, "failed to read ip bitmap %u", i);
return err;
}
}
return 0;
}
/*
* Free queue record data
*/
struct apfs_fq_rec {
u64 xid;
u64 bno;
u64 len;
};
/**
* apfs_fq_rec_from_query - Read the free queue record found by a query
* @query: the query that found the record
* @fqrec: on return, the free queue record
*
* Reads the free queue record into @fqrec and performs some basic sanity
* checks as a protection against crafted filesystems. Returns 0 on success
* or -EFSCORRUPTED otherwise.
*/
static int apfs_fq_rec_from_query(struct apfs_query *query, struct apfs_fq_rec *fqrec)
{
char *raw = query->node->object.data;
struct apfs_spaceman_free_queue_key *key;
if (query->key_len != sizeof(*key)) {
apfs_err(query->node->object.sb, "bad key length (%d)", query->key_len);
return -EFSCORRUPTED;
}
key = (struct apfs_spaceman_free_queue_key *)(raw + query->key_off);
fqrec->xid = le64_to_cpu(key->sfqk_xid);
fqrec->bno = le64_to_cpu(key->sfqk_paddr);
if (query->len == 0) {
fqrec->len = 1; /* Ghost record */
return 0;
} else if (query->len == sizeof(__le64)) {
fqrec->len = le64_to_cpup((__le64 *)(raw + query->off));
return 0;
}
apfs_err(query->node->object.sb, "bad value length (%d)", query->len);
return -EFSCORRUPTED;
}
/**
* apfs_block_in_ip - Does this block belong to the internal pool?
* @sm: in-memory spaceman structure
* @bno: block number to check
*/
static inline bool apfs_block_in_ip(struct apfs_spaceman *sm, u64 bno)
{
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
u64 start = le64_to_cpu(sm_raw->sm_ip_base);
u64 end = start + le64_to_cpu(sm_raw->sm_ip_block_count);
return bno >= start && bno < end;
}
/**
* apfs_ip_mark_free - Mark a block in the internal pool as free
* @sb: superblock structure
* @bno: block number (must belong to the ip)
*/
static int apfs_ip_mark_free(struct super_block *sb, u64 bno)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_ip_bitmap_block_info *info = NULL;
bno -= le64_to_cpu(sm_raw->sm_ip_base);
info = &sm->sm_ip_bmaps[bno >> sm->sm_ip_bmaps_shift];
__clear_bit_le(bno & sm->sm_ip_bmaps_mask, info->block);
info->dirty = true;
return 0;
}
/*
* apfs_main_free - Mark a regular block as free
*/
static int apfs_main_free(struct super_block *sb, u64 bno);
/**
* apfs_flush_fq_rec - Delete a single fq record and mark its blocks as free
* @root: free queue root node
* @xid: transaction to target
* @len: on return, the number of freed blocks
*
* Returns 0 on success, or a negative error code in case of failure. -ENODATA
* in particular means that there are no matching records left.
*/
static int apfs_flush_fq_rec(struct apfs_node *root, u64 xid, u64 *len)
{
struct super_block *sb = root->object.sb;
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_query *query = NULL;
struct apfs_fq_rec fqrec = {0};
u64 bno;
int err;
query = apfs_alloc_query(root, NULL /* parent */);
if (!query)
return -ENOMEM;
apfs_init_free_queue_key(xid, 0 /* paddr */, &query->key);
query->flags |= APFS_QUERY_FREE_QUEUE | APFS_QUERY_ANY_NUMBER | APFS_QUERY_EXACT;
err = apfs_btree_query(sb, &query);
if (err) {
if (err != -ENODATA)
apfs_err(sb, "query failed for xid 0x%llx, paddr 0x%llx", xid, 0ULL);
goto fail;
}
err = apfs_fq_rec_from_query(query, &fqrec);
if (err) {
apfs_err(sb, "bad free queue rec for xid 0x%llx", xid);
goto fail;
}
for (bno = fqrec.bno; bno < fqrec.bno + fqrec.len; ++bno) {
if (apfs_block_in_ip(sm, bno))
err = apfs_ip_mark_free(sb, bno);
else
err = apfs_main_free(sb, bno);
if (err) {
apfs_err(sb, "freeing block 0x%llx failed (%d)", (unsigned long long)bno, err);
goto fail;
}
}
err = apfs_btree_remove(query);
if (err) {
apfs_err(sb, "removal failed for xid 0x%llx", xid);
goto fail;
}
*len = fqrec.len;
fail:
apfs_free_query(query);
return err;
}
/**
* apfs_free_queue_oldest_xid - Find the oldest xid among the free queue records
* @root: free queue root node
*/
static u64 apfs_free_queue_oldest_xid(struct apfs_node *root)
{
struct apfs_spaceman_free_queue_key *key;
char *raw = root->object.data;
int len, off;
if (root->records == 0)
return 0;
len = apfs_node_locate_key(root, 0, &off);
if (len != sizeof(*key)) {
/* TODO: abort transaction */
apfs_err(root->object.sb, "bad key length (%d)", len);
return 0;
}
key = (struct apfs_spaceman_free_queue_key *)(raw + off);
return le64_to_cpu(key->sfqk_xid);
}
/**
* apfs_flush_free_queue - Free ip blocks queued by old transactions
* @sb: superblock structure
* @qid: queue to be freed
* @force: flush as much as possible
*
* Returns 0 on success or a negative error code in case of failure.
*/
static int apfs_flush_free_queue(struct super_block *sb, unsigned int qid, bool force)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_spaceman_free_queue *fq = &sm_raw->sm_fq[qid];
struct apfs_node *fq_root;
u64 oldest = le64_to_cpu(fq->sfq_oldest_xid);
int err;
fq_root = apfs_read_node(sb, le64_to_cpu(fq->sfq_tree_oid),
APFS_OBJ_EPHEMERAL, true /* write */);
if (IS_ERR(fq_root)) {
apfs_err(sb, "failed to read fq root 0x%llx", le64_to_cpu(fq->sfq_tree_oid));
return PTR_ERR(fq_root);
}
while (oldest) {
u64 sfq_count;
/*
* Try to preserve one transaction here. I don't really know
* what free queues are for so this is probably silly.
*/
if (force) {
if (oldest == nxi->nx_xid)
break;
} else {
if (oldest + 1 >= nxi->nx_xid)
break;
}
while (true) {
u64 count = 0;
/* Probably not very efficient... */
err = apfs_flush_fq_rec(fq_root, oldest, &count);
if (err == -ENODATA) {
err = 0;
break;
} else if (err) {
apfs_err(sb, "failed to flush fq");
goto fail;
} else {
le64_add_cpu(&fq->sfq_count, -count);
}
}
oldest = apfs_free_queue_oldest_xid(fq_root);
fq->sfq_oldest_xid = cpu_to_le64(oldest);
if (force)
continue;
/*
* Flushing a single transaction may not be enough to avoid
* running out of space in the ip, but it's probably best not
* to flush all the old transactions at once either. We use a
* harsher version of the apfs_transaction_need_commit() check,
* to make sure we won't be forced to commit again right away.
*/
sfq_count = le64_to_cpu(fq->sfq_count);
if (qid == APFS_SFQ_IP && sfq_count * 6 <= le64_to_cpu(sm_raw->sm_ip_block_count))
break;
if (qid == APFS_SFQ_MAIN && sfq_count <= APFS_TRANS_MAIN_QUEUE_MAX - 200)
break;
}
fail:
apfs_node_free(fq_root);
return err;
}
/**
* apfs_allocate_spaceman - Allocate an in-memory spaceman struct, if needed
* @sb: superblock structure
* @raw: on-disk spaceman struct
* @size: size of the on-disk spaceman
*
* Returns the spaceman and sets it in the superblock info. Also performs all
* initializations for the internal pool, including reading all the ip bitmaps.
* This is a bit out of place here, but it's convenient because it has to
* happen only once.
*
* On failure, returns an error pointer.
*/
static struct apfs_spaceman *apfs_allocate_spaceman(struct super_block *sb, struct apfs_spaceman_phys *raw, u32 size)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *spaceman = NULL;
int blk_bitcnt = sb->s_blocksize * 8;
size_t sm_size;
u32 bmap_cnt;
int err;
if (nxi->nx_spaceman)
return nxi->nx_spaceman;
/* We don't expect filesystems this big, it would be like 260 TiB */
bmap_cnt = le32_to_cpu(raw->sm_ip_bm_size_in_blocks);
if (bmap_cnt > 200) {
apfs_err(sb, "too many ip bitmap blocks (%u)", bmap_cnt);
return ERR_PTR(-EFSCORRUPTED);
}
sm_size = sizeof(*spaceman) + bmap_cnt * sizeof(spaceman->sm_ip_bmaps[0]);
spaceman = nxi->nx_spaceman = kzalloc(sm_size, GFP_KERNEL);
if (!spaceman)
return ERR_PTR(-ENOMEM);
spaceman->sm_nxi = nxi;
/*
* These two fields must be set before reading the ip bitmaps, since
* that stuff involves several variable-length arrays inside the
* spaceman object itself.
*/
spaceman->sm_raw = raw;
spaceman->sm_size = size;
spaceman->sm_ip_bmaps_count = bmap_cnt;
spaceman->sm_ip_bmaps_mask = blk_bitcnt - 1;
spaceman->sm_ip_bmaps_shift = order_base_2(blk_bitcnt);
/* This must happen only once, so it's easier to just leave it here */
err = apfs_read_ip_bitmaps(sb);
if (err) {
apfs_err(sb, "failed to read the ip bitmaps");
kfree(spaceman);
nxi->nx_spaceman = spaceman = NULL;
return ERR_PTR(err);
}
return nxi->nx_spaceman;
}
/**
* apfs_read_spaceman - Find and read the space manager
* @sb: superblock structure
*
* Reads the space manager structure from disk and initializes its in-memory
* counterpart; returns 0 on success, or a negative error code in case of
* failure.
*/
int apfs_read_spaceman(struct super_block *sb)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_nx_superblock *raw_sb = nxi->nx_raw;
struct apfs_spaceman *spaceman = NULL;
struct apfs_ephemeral_object_info *sm_eph_info = NULL;
struct apfs_spaceman_phys *sm_raw;
u32 sm_flags;
u64 oid = le64_to_cpu(raw_sb->nx_spaceman_oid);
int err;
if (sb->s_flags & SB_RDONLY) /* The space manager won't be needed */
return 0;
sm_eph_info = apfs_ephemeral_object_lookup(sb, oid);
if (IS_ERR(sm_eph_info)) {
apfs_err(sb, "no spaceman object for oid 0x%llx", oid);
return PTR_ERR(sm_eph_info);
}
sm_raw = (struct apfs_spaceman_phys *)sm_eph_info->object;
sm_raw->sm_o.o_xid = cpu_to_le64(nxi->nx_xid);
spaceman = apfs_allocate_spaceman(sb, sm_raw, sm_eph_info->size);
if (IS_ERR(spaceman)) {
apfs_err(sb, "failed to allocate spaceman");
err = PTR_ERR(spaceman);
goto fail;
}
spaceman->sm_free_cache_base = spaceman->sm_free_cache_blkcnt = 0;
sm_flags = le32_to_cpu(sm_raw->sm_flags);
/* Undocumented feature, but it's too common to refuse to mount */
if (sm_flags & APFS_SM_FLAG_VERSIONED)
pr_warn_once("APFS: space manager is versioned\n");
/* Only read the main device; fusion drives are not yet supported */
err = apfs_read_spaceman_dev(sb, &sm_raw->sm_dev[APFS_SD_MAIN]);
if (err) {
apfs_err(sb, "failed to read main device");
goto fail;
}
spaceman->sm_blocks_per_chunk =
le32_to_cpu(sm_raw->sm_blocks_per_chunk);
spaceman->sm_chunks_per_cib = le32_to_cpu(sm_raw->sm_chunks_per_cib);
if (spaceman->sm_chunks_per_cib > apfs_max_chunks_per_cib(sb)) {
apfs_err(sb, "too many chunks per cib (%u)", spaceman->sm_chunks_per_cib);
err = -EFSCORRUPTED;
goto fail;
}
err = apfs_flush_free_queue(sb, APFS_SFQ_IP, false /* force */);
if (err) {
apfs_err(sb, "failed to flush ip fq");
goto fail;
}
err = apfs_flush_free_queue(sb, APFS_SFQ_MAIN, false /* force */);
if (err) {
apfs_err(sb, "failed to flush main fq");
goto fail;
}
return 0;
fail:
spaceman->sm_raw = NULL;
return err;
}
/**
* apfs_write_spaceman - Write the in-memory spaceman fields to the disk buffer
* @sm: in-memory spaceman structure
*
* Copies the updated in-memory fields of the space manager into the on-disk
* structure; the buffer is not dirtied.
*/
static void apfs_write_spaceman(struct apfs_spaceman *sm)
{
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_spaceman_device *dev_raw = &sm_raw->sm_dev[APFS_SD_MAIN];
struct apfs_nxsb_info *nxi;
nxi = sm->sm_nxi;
ASSERT(le64_to_cpu(sm_raw->sm_o.o_xid) == nxi->nx_xid);
dev_raw->sm_free_count = cpu_to_le64(sm->sm_free_count);
}
/**
* apfs_ip_find_free - Find a free block inside the internal pool
* @sb: superblock structure
*
* Returns the block number for a free block, or 0 in case of corruption.
*/
static u64 apfs_ip_find_free(struct super_block *sb)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
int blk_bitcnt = sb->s_blocksize * 8;
u64 full_bitcnt = le64_to_cpu(sm_raw->sm_ip_block_count);
u32 i;
for (i = 0; i < sm->sm_ip_bmaps_count; ++i) {
char *bitmap = sm->sm_ip_bmaps[i].block;
u64 off_in_bmap_blk, off_in_ip;
off_in_bmap_blk = find_next_zero_bit_le(bitmap, blk_bitcnt, 0 /* offset */);
if (off_in_bmap_blk >= blk_bitcnt) /* No space in this chunk */
continue;
/* We found something, confirm that it's not outside the ip */
off_in_ip = (i << sm->sm_ip_bmaps_shift) + off_in_bmap_blk;
if (off_in_ip >= full_bitcnt)
break;
return le64_to_cpu(sm_raw->sm_ip_base) + off_in_ip;
}
apfs_err(sb, "internal pool seems full");
return 0;
}
/**
* apfs_chunk_find_free - Find a free block inside a chunk
* @sb: superblock structure
* @bitmap: allocation bitmap for the chunk, which should have free blocks
* @addr: number of the first block in the chunk
*
* Returns the block number for a free block, or 0 in case of corruption.
*/
static u64 apfs_chunk_find_free(struct super_block *sb, char *bitmap, u64 addr)
{
int bitcount = sb->s_blocksize * 8;
u64 bno;
bno = find_next_zero_bit_le(bitmap, bitcount, 0 /* offset */);
if (bno >= bitcount)
return 0;
return addr + bno;
}
/**
* apfs_ip_mark_used - Mark a block in the internal pool as used
* @sb: superblock strucuture
* @bno: block number (must belong to the ip)
*/
static void apfs_ip_mark_used(struct super_block *sb, u64 bno)
{
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_ip_bitmap_block_info *info = NULL;
bno -= le64_to_cpu(sm_raw->sm_ip_base);
info = &sm->sm_ip_bmaps[bno >> sm->sm_ip_bmaps_shift];
__set_bit_le(bno & sm->sm_ip_bmaps_mask, info->block);
info->dirty = true;
}
/**
* apfs_chunk_mark_used - Mark a block inside a chunk as used
* @sb: superblock structure
* @bitmap: allocation bitmap for the chunk
* @bno: block number (must belong to the chunk)
*/
static inline void apfs_chunk_mark_used(struct super_block *sb, char *bitmap,
u64 bno)
{
int bitcount = sb->s_blocksize * 8;
__set_bit_le(bno & (bitcount - 1), bitmap);
}
/**
* apfs_chunk_mark_free - Mark a block inside a chunk as free
* @sb: superblock structure
* @bitmap: allocation bitmap for the chunk
* @bno: block number (must belong to the chunk)
*/
static inline int apfs_chunk_mark_free(struct super_block *sb, char *bitmap,
u64 bno)
{
int bitcount = sb->s_blocksize * 8;
return __test_and_clear_bit_le(bno & (bitcount - 1), bitmap);
}
/**
* apfs_free_queue_try_insert - Try to add a block range to its free queue
* @sb: superblock structure
* @bno: first block number to free
* @count: number of consecutive blocks to free
*
* Same as apfs_free_queue_insert_nocache(), except that this one can also fail
* with -EAGAIN if there is no room for the new record, so that the caller can
* flush the queue and retry.
*/
static int apfs_free_queue_try_insert(struct super_block *sb, u64 bno, u64 count)
{
struct apfs_nxsb_info *nxi = APFS_NXI(sb);
struct apfs_spaceman *sm = APFS_SM(sb);
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
struct apfs_spaceman_free_queue *fq;
struct apfs_node *fq_root = NULL;
struct apfs_btree_info *fq_info = NULL;
struct apfs_query *query = NULL;
struct apfs_spaceman_free_queue_key raw_key;
bool ghost = count == 1;
int needed_room;
__le64 raw_val;
u64 node_count;
u16 node_limit;
int err;
if (apfs_block_in_ip(sm, bno))
fq = &sm_raw->sm_fq[APFS_SFQ_IP];
else
fq = &sm_raw->sm_fq[APFS_SFQ_MAIN];
fq_root = apfs_read_node(sb, le64_to_cpu(fq->sfq_tree_oid),
APFS_OBJ_EPHEMERAL, true /* write */);
if (IS_ERR(fq_root)) {
apfs_err(sb, "failed to read fq root 0x%llx", le64_to_cpu(fq->sfq_tree_oid));
return PTR_ERR(fq_root);
}
query = apfs_alloc_query(fq_root, NULL /* parent */);
if (!query) {
err = -ENOMEM;
goto fail;
}
apfs_init_free_queue_key(nxi->nx_xid, bno, &query->key);
query->flags |= APFS_QUERY_FREE_QUEUE;
err = apfs_btree_query(sb, &query);
if (err && err != -ENODATA) {
apfs_err(sb, "query failed for xid 0x%llx, paddr 0x%llx", nxi->nx_xid, bno);
goto fail;
}
fq_info = (void *)fq_root->object.data + sb->s_blocksize - sizeof(*fq_info);
node_count = le64_to_cpu(fq_info->bt_node_count);
node_limit = le16_to_cpu(fq->sfq_tree_node_limit);
if (node_count == node_limit) {
needed_room = sizeof(raw_key) + (ghost ? 0 : sizeof(raw_val));
if (!apfs_node_has_room(query->node, needed_room, false /* replace */)) {
err = -EAGAIN;
goto fail;
}
}
raw_key.sfqk_xid = cpu_to_le64(nxi->nx_xid);
raw_key.sfqk_paddr = cpu_to_le64(bno);
if (ghost) {
/* A lack of value (ghost record) means single-block extent */
err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), NULL /* val */, 0 /* val_len */);
} else {
raw_val = cpu_to_le64(count);
err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val));
}
if (err) {
apfs_err(sb, "insertion failed for xid 0x%llx, paddr 0x%llx", nxi->nx_xid, bno);
goto fail;
}
if (!fq->sfq_oldest_xid)