Skip to content

Commit

Permalink
make it thread-safe
Browse files Browse the repository at this point in the history
This patch modifies this fork of lwext4 to make is safe to
interact with by multiple threads running in OSv.

The key assumption is, that OSv VFS layer provides necessary
locking around all interactions with lwext4 to guard ext filesystem
metadata (i-node table, directory entries, etc) modifications confined
to specific vnode.

Beyond that, we add necessary locking around 3 key common data
structures:
- i-node bitmaps in ext4_ialloc.c
- data block bitmaps in ext4_balloc.c
- metadata blockcache in ext4_bcache.c and related files

More specifically following functions are protected with
inode_alloc_lock()/unlock() to make sure no two files/directories
get assigned same inode number:
- ext4_ialloc_alloc_inode()
- ext4_ialloc_free_inode()

Next, following functions are protected with block_alloc_lock()/unlock()
to make sure no two files/directories use same data block:
- ext4_balloc_alloc_block()
- ext4_balloc_free_block()
- ext4_balloc_free_blocks()

Finally, these functions in ext4_bcache.c and related source files
are protected with bcache_lock()/unlock() to make sure the global
metadata block cache access is synchronized:
- ext4_bcache_invalidate_lba() in __ext4_balloc_free_block() and
  __ext4_balloc_free_blocks()
- ext4_bcache_find_get(), ext4_block_flush_buf() and ext4_bcache_free()
  in ext4_block_flush_lba()
- ext4_block_get_noread(), ext4_bcache_test_flag() ext4_bcache_free() in
  ext4_block_get()
- ext4_bcache_free() in ext4_block_set()
- ext4_block_get_noread() in ext4_trans_block_get_noread()

Ref gkostka#83
Ref cloudius-systems/osv#1179

Signed-off-by: Waldemar Kozaczuk <jwkozaczuk@gmail.com>
  • Loading branch information
wkozaczuk committed Jul 9, 2024
1 parent c6fed65 commit 44c3329
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 39 deletions.
46 changes: 20 additions & 26 deletions include/ext4_bcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,26 @@ struct ext4_block {

struct ext4_bcache;

/**@brief buffer state bits
*
* - BC_UPTODATE: Buffer contains valid data.
* - BC_DIRTY: Buffer is dirty.
* - BC_FLUSH: Buffer will be immediately flushed,
* when no one references it.
* - BC_TMP: Buffer will be dropped once its refctr
* reaches zero.
*/
enum bcache_state_bits {
BC_UPTODATE, // Mostly set in ext4_block_get() after reading from disk
BC_DIRTY, // Most importantly set in ext4_bcache_set_dirty()
BC_FLUSH, // Set in journal code
BC_TMP // Set in journal code
};

/**@brief Single block descriptor*/
struct ext4_buf {
/**@brief Flags*/
int flags;
char flags[BC_TMP + 1];

/**@brief Logical block address*/
uint64_t lba;
Expand Down Expand Up @@ -148,30 +164,14 @@ struct ext4_bcache {
SLIST_HEAD(ext4_buf_dirty, ext4_buf) dirty_list;
};

/**@brief buffer state bits
*
* - BC♡UPTODATE: Buffer contains valid data.
* - BC_DIRTY: Buffer is dirty.
* - BC_FLUSH: Buffer will be immediately flushed,
* when no one references it.
* - BC_TMP: Buffer will be dropped once its refctr
* reaches zero.
*/
enum bcache_state_bits {
BC_UPTODATE,
BC_DIRTY,
BC_FLUSH,
BC_TMP
};

#define ext4_bcache_set_flag(buf, b) \
(buf)->flags |= 1 << (b)
(buf)->flags[b] = 1

#define ext4_bcache_clear_flag(buf, b) \
(buf)->flags &= ~(1 << (b))
(buf)->flags[b] = 0

#define ext4_bcache_test_flag(buf, b) \
(((buf)->flags & (1 << (b))) >> (b))
(((buf)->flags[b] == 1 ))

static inline void ext4_bcache_set_dirty(struct ext4_buf *buf) {
ext4_bcache_set_flag(buf, BC_UPTODATE);
Expand Down Expand Up @@ -239,12 +239,6 @@ struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc);
* @param buf buffer*/
void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf);

/**@brief Invalidate a buffer.
* @param bc block cache descriptor
* @param buf buffer*/
void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
struct ext4_buf *buf);

/**@brief Invalidate a range of buffers.
* @param bc block cache descriptor
* @param from starting lba
Expand Down
9 changes: 9 additions & 0 deletions include/ext4_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ struct ext4_fs {
struct jbd_fs *jbd_fs;
struct jbd_journal *jbd_journal;
struct jbd_trans *curr_trans;

void (*inode_alloc_lock)();
void (*inode_alloc_unlock)();

void (*block_alloc_lock)();
void (*block_alloc_unlock)();

void (*bcache_lock)();
void (*bcache_unlock)();
};

struct ext4_block_group_ref {
Expand Down
40 changes: 37 additions & 3 deletions src/ext4_balloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ ext4_balloc_verify_bitmap_csum(struct ext4_sblock *sb,
#define ext4_balloc_verify_bitmap_csum(...) true
#endif

int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)
static int
__ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)
{
struct ext4_fs *fs = inode_ref->fs;
struct ext4_sblock *sb = &fs->sb;
Expand Down Expand Up @@ -223,14 +224,25 @@ int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)
ext4_fs_put_block_group_ref(&bg_ref);
return rc;
}
fs->bcache_lock();
ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1);
fs->bcache_unlock();
/* Release block group reference */
rc = ext4_fs_put_block_group_ref(&bg_ref);

return rc;
}

int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)
{
inode_ref->fs->block_alloc_lock();
int rc = __ext4_balloc_free_block(inode_ref, baddr);
inode_ref->fs->block_alloc_unlock();
return rc;
}

static int
__ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
ext4_fsblk_t first, uint32_t count)
{
int rc = EOK;
Expand Down Expand Up @@ -342,14 +354,26 @@ int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,

}

fs->bcache_lock();
ext4_bcache_invalidate_lba(fs->bdev->bc, start_block, blk_cnt);
fs->bcache_unlock();
/*All blocks should be released*/
ext4_assert(count == 0);

return rc;
}

int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
ext4_fsblk_t first, uint32_t count)
{
inode_ref->fs->block_alloc_lock();
int rc = __ext4_balloc_free_blocks(inode_ref, first, count);
inode_ref->fs->block_alloc_unlock();
return rc;
}

static int
__ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
ext4_fsblk_t goal,
ext4_fsblk_t *fblock)
{
Expand Down Expand Up @@ -582,6 +606,16 @@ int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
return r;
}

int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
ext4_fsblk_t goal,
ext4_fsblk_t *fblock)
{
inode_ref->fs->block_alloc_lock();
int rc = __ext4_balloc_alloc_block(inode_ref, goal, fblock);
inode_ref->fs->block_alloc_unlock();
return rc;
}

int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref,
ext4_fsblk_t baddr, bool *free)
{
Expand Down
25 changes: 25 additions & 0 deletions src/ext4_bcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,19 @@ ext4_buf_lookup(struct ext4_bcache *bc, uint64_t lba)
return RB_FIND(ext4_buf_lba, &bc->lba_root, &tmp);
}

// No need for locking
// Called by ext4_block_cache_shake() ONLY
// PROTECTED in ext4_block_get_noread() because the only
// caller - ext4_block_cache_shake() - is protected
struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc)
{
return RB_MIN(ext4_buf_lru, &bc->lru_root);
}

// No need for locking
// Called by ext4_block_cache_shake() and functions in this file
// PROTECTED in ext4_block_get_noread() because the
// caller - ext4_block_cache_shake() - is protected
void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf)
{
/* Warn on dropping any referenced buffers.*/
Expand All @@ -178,6 +186,10 @@ void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf)
bc->ref_blocks--;
}

/**@brief Invalidate a buffer.
* @param bc block cache descriptor
* @param buf buffer*/
static
void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
struct ext4_buf *buf)
{
Expand All @@ -191,6 +203,9 @@ void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
ext4_bcache_clear_dirty(buf);
}

// Called by ext4_balloc_free_block/s()
// Protect by LOCK
// PROTECTED in the callers
void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
uint64_t from,
uint32_t cnt)
Expand All @@ -205,6 +220,10 @@ void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
}
}

// Called by:
// - ext4_bcache_alloc() - No need to lock (see below)
// - ext4_block_flush_lba() - probably need to lock there - PROTECTED
// - 2 methods in ext4_journal() not used right now - ignore
struct ext4_buf *
ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,
uint64_t lba)
Expand All @@ -231,6 +250,10 @@ ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,
return buf;
}

// Called by ext4_block_get_noread() ONLY which also calls ext4_block_cache_shake()
// which calls numbers of function here. So no need for locking,
// and maybe we should lock in ext4_block_get_noread()?
// PROTECTED in ext4_block_get_noread()
int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,
bool *is_new)
{
Expand Down Expand Up @@ -267,6 +290,8 @@ int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,
return EOK;
}

// Called by 3 functions - ext4_block_flush_lba(), ext4_block_get() and ext4_block_set()
// Protected there
int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b)
{
struct ext4_buf *buf = b->buf;
Expand Down
16 changes: 14 additions & 2 deletions src/ext4_blockdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,19 @@ int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf)
return EOK;
}

//Used indirectly by journal-ing code
int ext4_block_flush_lba(struct ext4_blockdev *bdev, uint64_t lba)
{
int r = EOK;
struct ext4_buf *buf;
struct ext4_block b;
bdev->fs->bcache_lock();
buf = ext4_bcache_find_get(bdev->bc, &b, lba);
if (buf) {
r = ext4_block_flush_buf(bdev, buf);
ext4_bcache_free(bdev->bc, &b);
}
bdev->fs->bcache_unlock();
return r;
}

Expand Down Expand Up @@ -244,26 +247,32 @@ int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b,
int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b,
uint64_t lba)
{
bdev->fs->bcache_lock();
int r = ext4_block_get_noread(bdev, b, lba);
if (r != EOK)
if (r != EOK) {
bdev->fs->bcache_unlock();
return r;
}

if (ext4_bcache_test_flag(b->buf, BC_UPTODATE)) {
/* Data in the cache is up-to-date.
* Reading from physical device is not required */
bdev->fs->bcache_unlock();
return EOK;
}

r = ext4_blocks_get_direct(bdev, b->data, lba, 1);
if (r != EOK) {
ext4_bcache_free(bdev->bc, b);
bdev->fs->bcache_unlock();
b->lb_id = 0;
return r;
}

/* Mark buffer up-to-date, since
* fresh data is read from physical device just now. */
ext4_bcache_set_flag(b->buf, BC_UPTODATE);
bdev->fs->bcache_unlock();
return EOK;
}

Expand All @@ -275,7 +284,10 @@ int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b)
if (!bdev->bdif->ph_refctr)
return EIO;

return ext4_bcache_free(bdev->bc, b);
bdev->fs->bcache_lock();
int rc = ext4_bcache_free(bdev->bc, b);
bdev->fs->bcache_unlock();
return rc;
}

int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, uint64_t lba,
Expand Down
9 changes: 6 additions & 3 deletions src/ext4_fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1657,8 +1657,10 @@ int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,
if (rc != EOK)
return rc;

*fblock = current_fsblk;
ext4_assert(*fblock);
if (fblock) {
*fblock = current_fsblk;
ext4_assert(*fblock);
}

ext4_inode_set_size(inode_ref->inode, inode_size + block_size);
inode_ref->dirty = true;
Expand Down Expand Up @@ -1702,7 +1704,8 @@ int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,
ext4_inode_set_size(inode_ref->inode, inode_size + block_size);
inode_ref->dirty = true;

*fblock = phys_block;
if (fblock)
*fblock = phys_block;
*iblock = new_block_idx;

return EOK;
Expand Down
23 changes: 21 additions & 2 deletions src/ext4_ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ static uint32_t ext4_ialloc_bitmap_csum(struct ext4_sblock *sb, void *bitmap)
#define ext4_ialloc_bitmap_csum(...) 0
#endif

//TODO: Check this one when called by ext4_fs_get_block_group_ref()
void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg,
void *bitmap __unused)
{
Expand Down Expand Up @@ -155,7 +156,8 @@ ext4_ialloc_verify_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg,
#define ext4_ialloc_verify_bitmap_csum(...) true
#endif

int ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir)
static int
__ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir)
{
struct ext4_sblock *sb = &fs->sb;

Expand Down Expand Up @@ -226,7 +228,16 @@ int ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir)
return EOK;
}

int ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir)
int ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir)
{
fs->inode_alloc_lock();
int rc = __ext4_ialloc_free_inode(fs, index, is_dir);
fs->inode_alloc_unlock();
return rc;
}

static int
__ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir)
{
struct ext4_sblock *sb = &fs->sb;

Expand Down Expand Up @@ -365,6 +376,14 @@ int ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir)
return ENOSPC;
}

int ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir)
{
fs->inode_alloc_lock();
int rc = __ext4_ialloc_alloc_inode(fs, idx, is_dir);
fs->inode_alloc_unlock();
return rc;
}

/**
* @}
*/
5 changes: 2 additions & 3 deletions src/ext4_trans.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,9 @@ int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,
struct ext4_block *b,
uint64_t lba)
{
bdev->fs->bcache_lock();
int r = ext4_block_get_noread(bdev, b, lba);
if (r != EOK)
return r;

bdev->fs->bcache_unlock();
return r;
}

Expand Down

0 comments on commit 44c3329

Please sign in to comment.