2021SC@SDUSC
ext2_try_to_allocate的作用是尝试在给定的范围内分配块,参数group为给定的分配块组,grp_goal是给定组内的目标块,count是分配的块的目标数量。首先设置块的分配范围start到end,然后如果我们有一个窗口,我们在预留窗口内进行分配重设start和end的值,并且判断grp_goal是否在分配范围中,不在则grp_goal设为-1。然后判断grp_goal是否小于0,是则调用find_next_usable_block在分配范围中找到一个可用的块,找不到跳转到fail_access返回-1,接着当预留窗口不存在时,开始循环找到最小的grp_goal,条件是grp_goal大于start且位图上的第grp_goal-1位值为0。最后开始分配不超过count数量的块并修改位图。count变为实际分配的块数,返回grp_goal - num即最后的块号减去分配数是开始的块号。
static int
ext2_try_to_allocate(struct super_block *sb, int group,
struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal,
unsigned long *count,
struct ext2_reserve_window *my_rsv)
{
ext2_fsblk_t group_first_block = ext2_group_first_block_no(sb, group);
ext2_fsblk_t group_last_block = ext2_group_last_block_no(sb, group);
ext2_grpblk_t start, end;
unsigned long num = 0;
start = 0;
end = group_last_block - group_first_block + 1;
if (my_rsv) {
if (my_rsv->_rsv_start >= group_first_block)
start = my_rsv->_rsv_start - group_first_block;
if (my_rsv->_rsv_end < group_last_block)
end = my_rsv->_rsv_end - group_first_block + 1;
if (grp_goal < start || grp_goal >= end)
grp_goal = -1;
}
BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
if (grp_goal < 0) {
grp_goal = find_next_usable_block(start, bitmap_bh, end);
if (grp_goal < 0)
goto fail_access;
if (!my_rsv) {
int i;
for (i = 0; i < 7 && grp_goal > start &&
!ext2_test_bit(grp_goal - 1,
bitmap_bh->b_data);
i++, grp_goal--)
;
}
}
for (; num < *count && grp_goal < end; grp_goal++) {
if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group),
grp_goal, bitmap_bh->b_data)) {
if (num == 0)
continue;
break;
}
num++;
}
if (num == 0)
goto fail_access;
*count = num;
return grp_goal - num;
fail_access:
return -1;
}
find_next_reservable_window作用是在给定的范围内找到一个预留窗口,有head和start_block来帮助搜索预留空间。列表从head开始,然后转移移到start_block所在的位置,在寻找可保留空间时从那里开始,size的作用是使得预留窗口的开始按字节对齐。在while循环中,遍历rb树找寻预留窗口,如果cur越界那么直接跳出循环,当!next即到达树的末尾,并且在树的最后一项之后有空的保留空间,将其添加到列表的末尾。循环结束后,检查磁盘位图,如果prev != my_rsv即窗口大小不等,并且没有空闲块,那么rsv_window_remove将删除树种的my_rsv节点,如果有空闲块就调整my_rsv窗口大小后添加节点。
static int find_next_reservable_window(
struct ext2_reserve_window_node *search_head,
struct ext2_reserve_window_node *my_rsv,
struct super_block * sb,
ext2_fsblk_t start_block,
ext2_fsblk_t last_block)
{
struct rb_node *next;
struct ext2_reserve_window_node *rsv, *prev;
ext2_fsblk_t cur;
int size = my_rsv->rsv_goal_size;
cur = start_block;
rsv = search_head;
if (!rsv)
return -1;
while (1) {
if (cur <= rsv->rsv_end)
cur = rsv->rsv_end + 1;
if (cur > last_block)
return -1;
prev = rsv;
next = rb_next(&rsv->rsv_node);
rsv = rb_entry(next,struct ext2_reserve_window_node,rsv_node);
if (!next)
break;
if (cur + size <= rsv->rsv_start) {
break;
}
}
if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
rsv_window_remove(sb, my_rsv);
my_rsv->rsv_start = cur;
my_rsv->rsv_end = cur + size - 1;
my_rsv->rsv_alloc_hit = 0;
if (prev != my_rsv)
ext2_rsv_window_add(sb, my_rsv);
return 0;
}
alloc_new_reservation是分配新的预留窗口的函数。要创建一个新的预约,我们搜索文件系统预留列表的一部分。如果没有目标,我们尝试在分配目标附近或组的开始处分配一个新的预留窗口。我们首先在目标后找到一个预留的空间,然后从那里,我们检查位图后的第一个空闲块。如果在组结束之前没有空闲块,则整个组都满了,则失败。否则,检查空闲块是否在预期的预留空间内,如果是,则成功。如果第一个空闲块在预留空间之外,那么从第一个空闲块开始,我们搜索下一个可用空间,然后继续。如果my_rsv是跨组边界的,如果目标是在my_rsv内,当我们刚刚从窗口的第一部分分配失败时,还有另一部分属于下一组。在这种情况下,试图在这个组中分配一个新窗口将失败。或许我们应该保留预订窗口,将预订窗口的起始区块移到下一组的第一个区块。然后判断如果之前的分配命中率大于1/2,那么下次我们将把预留窗口的大小增加一倍,否则我们保持相同大小的窗口。接下来将search_head移到目标方块附近的窗口,开始retry,目的是为了确保预留窗口内有一个空闲位,我们需要在找到一个预留窗口后检查位图,因为find_next_reservable_window只是在给定的范围内找到一个预留窗口。所以bitmap_search_next_usable_block从我们刚找到的预留空间的起始块开始搜索块位图上的第一个空闲位,如果first_free_block < 0,就是没有找到,返回失败。然后检查第一个空闲块是否在我们刚刚预留的空闲空间内,是的话代表分配成功,否则将search_head移到我们刚找到的预留空间重新开始retry。
static int alloc_new_reservation(struct ext2_reserve_window_node *my_rsv,
ext2_grpblk_t grp_goal, struct super_block *sb,
unsigned int group, struct buffer_head *bitmap_bh)
{
struct ext2_reserve_window_node *search_head;
ext2_fsblk_t group_first_block, group_end_block, start_block;
ext2_grpblk_t first_free_block;
struct rb_root *fs_rsv_root = &EXT2_SB(sb)->s_rsv_window_root;
unsigned long size;
int ret;
spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock;
group_first_block = ext2_group_first_block_no(sb, group);
group_end_block = ext2_group_last_block_no(sb, group);
if (grp_goal < 0)
start_block = group_first_block;
else
start_block = grp_goal + group_first_block;
size = my_rsv->rsv_goal_size;
if (!rsv_is_empty(&my_rsv->rsv_window)) {
if ((my_rsv->rsv_start <= group_end_block) &&
(my_rsv->rsv_end > group_end_block) &&
(start_block >= my_rsv->rsv_start))
return -1;
if ((my_rsv->rsv_alloc_hit >
(my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
size = size * 2;
if (size > EXT2_MAX_RESERVE_BLOCKS)
size = EXT2_MAX_RESERVE_BLOCKS;
my_rsv->rsv_goal_size= size;
}
}
spin_lock(rsv_lock);
search_head = search_reserve_window(fs_rsv_root, start_block);
retry:
ret = find_next_reservable_window(search_head, my_rsv, sb,
start_block, group_end_block);
if (ret == -1) {
if (!rsv_is_empty(&my_rsv->rsv_window))
rsv_window_remove(sb, my_rsv);
spin_unlock(rsv_lock);
return -1;
}
spin_unlock(rsv_lock);
first_free_block = bitmap_search_next_usable_block(
my_rsv->rsv_start - group_first_block,
bitmap_bh, group_end_block - group_first_block + 1);
if (first_free_block < 0) {
spin_lock(rsv_lock);
if (!rsv_is_empty(&my_rsv->rsv_window))
rsv_window_remove(sb, my_rsv);
spin_unlock(rsv_lock);
return -1;
}
start_block = first_free_block + group_first_block;
if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
return 0;
search_head = my_rsv;
spin_lock(rsv_lock);
goto retry;
}
try_to_extend_reservation尝试将预留窗口扩展到足够大的空闲块数量,如果下一个预留窗口的起始块号比my_rsv的结束块号至少多一个size,那么my_rsv扩大一个size,否则扩展到next_rsv->rsv_start前一位。
static void try_to_extend_reservation(struct ext2_reserve_window_node *my_rsv,
struct super_block *sb, int size)
{
struct ext2_reserve_window_node *next_rsv;
struct rb_node *next;
spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock;
if (!spin_trylock(rsv_lock))
return;
next = rb_next(&my_rsv->rsv_node);
if (!next)
my_rsv->rsv_end += size;
else {
next_rsv = rb_entry(next, struct ext2_reserve_window_node, rsv_node);
if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
my_rsv->rsv_end += size;
else
my_rsv->rsv_end = next_rsv->rsv_start - 1;
}
spin_unlock(rsv_lock);
}
ext2_try_to_allocate_with_rsv ()是用来分配一个新块和它的预留窗口的主要函数。首先查看如果文件系统没有保留而被挂载,调用ext2_try_to_allocate不分配预留,而是直接分配块。grp_goal是块组的相对块号,first block是块组中的第一个块号。每次需要一个新的块分配时,我们将从inode的预留窗口分配一个新块。如果它没有预留窗口,或是最后一次尝试从现有的预留分配块失败,或是已设定好目标块和窗口,我们需要分配一个新的预留窗口。如果不需要分配窗口,且grp_goal >= 0,找到目标块在块组中的位置,判断是否需要扩展窗口大小。最后判断窗口是否跨组,然后跳转到目标位置开始分配。
static ext2_grpblk_t
ext2_try_to_allocate_with_rsv(struct super_block *sb, unsigned int group,
struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal,
struct ext2_reserve_window_node * my_rsv,
unsigned long *count)
{
ext2_fsblk_t group_first_block, group_last_block;
ext2_grpblk_t ret = 0;
unsigned long num = *count;
if (my_rsv == NULL) {
return ext2_try_to_allocate(sb, group, bitmap_bh,
grp_goal, count, NULL);
}
group_first_block = ext2_group_first_block_no(sb, group);
group_last_block = ext2_group_last_block_no(sb, group);
while (1) {
if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
!goal_in_my_reservation(&my_rsv->rsv_window,
grp_goal, group, sb)) {
if (my_rsv->rsv_goal_size < *count)
my_rsv->rsv_goal_size = *count;
ret = alloc_new_reservation(my_rsv, grp_goal, sb,
group, bitmap_bh);
if (ret < 0)
break;
if (!goal_in_my_reservation(&my_rsv->rsv_window,
grp_goal, group, sb))
grp_goal = -1;
} else if (grp_goal >= 0) {
int curr = my_rsv->rsv_end -
(grp_goal + group_first_block) + 1;
if (curr < *count)
try_to_extend_reservation(my_rsv, sb,
*count - curr);
}
if ((my_rsv->rsv_start > group_last_block) ||
(my_rsv->rsv_end < group_first_block)) {
rsv_window_dump(&EXT2_SB(sb)->s_rsv_window_root, 1);
BUG();
}
ret = ext2_try_to_allocate(sb, group, bitmap_bh, grp_goal,
&num, &my_rsv->rsv_window);
if (ret >= 0) {
my_rsv->rsv_alloc_hit += num;
*count = num;
break;
}
num = *count;
}
return ret;
}
ext2_has_free_blocks检查文件系统是否有待分配的空闲块。percpu_counter_read_positive函数是为了防止多处理器并发导致的读取失败,从ext2_sb_info->s_freeblocks_counter得到的是当前ext2的空闲的数据块数目,root_blocks是从ext2_super_block得到保留的数据块数目。
static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
{
ext2_fsblk_t free_blocks, root_blocks;
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
!uid_eq(sbi->s_resuid, current_fsuid()) &&
(gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) ||
!in_group_p (sbi->s_resgid))) {
return 0;
}
return 1;
}
ext2_data_block_valid判断如果传入的块区域是无效的,或某些部分与文件系统的元数据块重叠,则返回0,否则返回1.
int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
unsigned int count)
{
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count - 1 < start_blk) ||
(start_blk + count - 1 >= le32_to_cpu(sbi->s_es->s_blocks_count)))
return 0;
if ((start_blk <= sbi->s_sb_block) &&
(start_blk + count - 1 >= sbi->s_sb_block))
return 0;
return 1;
}
ext2_new_blocks使用目标块来辅助分配。如果目标是空闲的,或者在目标的32个块中有一个空闲块,就会分配该块。否则,向前查找空闲块;在每个块组中,搜索首先在块位图中查找整个空闲字节,如果失败,然后查找任何空闲位。首先检查块的分配配额,然后若文件系统挂载是有预留则从预留窗口分配一个块,然后检查是否有空闲块,若无跳转到out。接下来,无论目标块是否空闲都尝试分配,然后进入retry_alloc。先获取块组描述符和空闲块数量,如果没有足够的空闲块来做一个新的预留,关闭这个分配的预留。接下来当空闲块数量大于0时,我们重试分配,因为bitmap_bh是一个非空指针,我们必须在调用read_block_bitmap()之前释放它,分配成功则跳转到allocated。否则开始搜索其他的块组, 如果没有空闲块或空闲块的数量小于预留窗口大小的一半,则跳过该块组,可以避免加载位图。接下来如果my_ rsv不为空,要清除它,原因是我们可能会得到一个虚假的早期ENOSPC错误,文件系统是“满的”预留,但可能确实有磁盘上可用的空闲块。在这种情况下,我们只是做块分配没有预留。然后看allocated部分,运行到此,说明终于分配到块了获得分配到的块在文件系统范围内的块号,然后检验ret_block是不是合法,接下来开始块赋值,bitmap_bh这个数据块标记为脏,SB_SYNCHRONOUS标志代表着一有修改立即写入磁盘。最后,如果实际分配数量小于目标量,修改配额,并标记inode为脏。
ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
unsigned long *count, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gdp_bh;
int group_no;
int goal_group;
ext2_grpblk_t grp_target_blk;
ext2_grpblk_t grp_alloc_blk;
ext2_fsblk_t ret_block;
int bgi;
int performed_allocation = 0;
ext2_grpblk_t free_blocks;
struct super_block *sb;
struct ext2_group_desc *gdp;
struct ext2_super_block *es;
struct ext2_sb_info *sbi;
struct ext2_reserve_window_node *my_rsv = NULL;
struct ext2_block_alloc_info *block_i;
unsigned short windowsz = 0;
unsigned long ngroups;
unsigned long num = *count;
int ret;
*errp = -ENOSPC;
sb = inode->i_sb;
ret = dquot_alloc_block(inode, num);
if (ret) {
*errp = ret;
return 0;
}
sbi = EXT2_SB(sb);
es = EXT2_SB(sb)->s_es;
ext2_debug("goal=%lu.n", goal);
block_i = EXT2_I(inode)->i_block_alloc_info;
if (block_i) {
windowsz = block_i->rsv_window_node.rsv_goal_size;
if (windowsz > 0)
my_rsv = &block_i->rsv_window_node;
}
if (!ext2_has_free_blocks(sbi)) {
*errp = -ENOSPC;
goto out;
}
if (goal < le32_to_cpu(es->s_first_data_block) ||
goal >= le32_to_cpu(es->s_blocks_count))
goal = le32_to_cpu(es->s_first_data_block);
group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
EXT2_BLOCKS_PER_GROUP(sb);
goal_group = group_no;
retry_alloc:
gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);
if (!gdp)
goto io_error;
free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
if (my_rsv && (free_blocks < windowsz)
&& (free_blocks > 0)
&& (rsv_is_empty(&my_rsv->rsv_window)))
my_rsv = NULL;
if (free_blocks > 0) {
grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
EXT2_BLOCKS_PER_GROUP(sb));
brelse(bitmap_bh);
bitmap_bh = read_block_bitmap(sb, group_no);
if (!bitmap_bh)
goto io_error;
grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,
bitmap_bh, grp_target_blk,
my_rsv, &num);
if (grp_alloc_blk >= 0)
goto allocated;
}
ngroups = EXT2_SB(sb)->s_groups_count;
smp_rmb();
for (bgi = 0; bgi < ngroups; bgi++) {
group_no++;
if (group_no >= ngroups)
group_no = 0;
gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);
if (!gdp)
goto io_error;
free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
if (!free_blocks)
continue;
if (my_rsv && (free_blocks <= (windowsz/2)))
continue;
brelse(bitmap_bh);
bitmap_bh = read_block_bitmap(sb, group_no);
if (!bitmap_bh)
goto io_error;
grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,
bitmap_bh, -1, my_rsv, &num);
if (grp_alloc_blk >= 0)
goto allocated;
}
if (my_rsv) {
my_rsv = NULL;
windowsz = 0;
group_no = goal_group;
goto retry_alloc;
}
*errp = -ENOSPC;
goto out;
allocated:
ext2_debug("using block group %d(%d)n",
group_no, gdp->bg_free_blocks_count);
ret_block = grp_alloc_blk + ext2_group_first_block_no(sb, group_no);
if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
EXT2_SB(sb)->s_itb_per_group) ||
in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
EXT2_SB(sb)->s_itb_per_group)) {
ext2_error(sb, "ext2_new_blocks",
"Allocating block in system zone - "
"blocks from "E2FSBLK", length %lu",
ret_block, num);
num = *count;
goto retry_alloc;
}
performed_allocation = 1;
if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
ext2_error(sb, "ext2_new_blocks",
"block("E2FSBLK") >= blocks count(%d) - "
"block_group = %d, es == %p ", ret_block,
le32_to_cpu(es->s_blocks_count), group_no, es);
goto out;
}
group_adjust_blocks(sb, group_no, gdp, gdp_bh, -num);
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
mark_buffer_dirty(bitmap_bh);
if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
*errp = 0;
brelse(bitmap_bh);
if (num < *count) {
dquot_free_block_nodirty(inode, *count-num);
mark_inode_dirty(inode);
*count = num;
}
return ret_block;
io_error:
*errp = -EIO;
out:
if (!performed_allocation) {
dquot_free_block_nodirty(inode, *count);
mark_inode_dirty(inode);
}
brelse(bitmap_bh);
return 0;
}
ext2_count_free按字节来计算空闲块的数目。ext2_count_free_blocks就是空闲块的数目,如果没有定义EXT2FS_DEBUG 就遍历每一个组描述符,直接对每个组的空闲块数相加,否则获得统计的组的空闲块数目后,还要获得数据块的位图然后使用 ext2_count_free真正意义上遍历位图获得空闲的位数。
#ifdef EXT2FS_DEBUG
unsigned long ext2_count_free(struct buffer_head *map, unsigned int numchars)
{
return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars);
}
#endif
unsigned long ext2_count_free_blocks (struct super_block * sb)
{
struct ext2_group_desc * desc;
unsigned long desc_count = 0;
int i;
#ifdef EXT2FS_DEBUG
unsigned long bitmap_count, x;
struct ext2_super_block *es;
es = EXT2_SB(sb)->s_es;
desc_count = 0;
bitmap_count = 0;
desc = NULL;
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
struct buffer_head *bitmap_bh;
desc = ext2_get_group_desc (sb, i, NULL);
if (!desc)
continue;
desc_count += le16_to_cpu(desc->bg_free_blocks_count);
bitmap_bh = read_block_bitmap(sb, i);
if (!bitmap_bh)
continue;
x = ext2_count_free(bitmap_bh, sb->s_blocksize);
printk ("group %d: stored = %d, counted = %lun",
i, le16_to_cpu(desc->bg_free_blocks_count), x);
bitmap_count += x;
brelse(bitmap_bh);
}
printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lun",
(long)le32_to_cpu(es->s_free_blocks_count),
desc_count, bitmap_count);
return bitmap_count;
#else
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
desc = ext2_get_group_desc (sb, i, NULL);
if (!desc)
continue;
desc_count += le16_to_cpu(desc->bg_free_blocks_count);
}
return desc_count;
#endif
}
test_root检验a是不是b的幂。ext2_group_sparse用来查找组号是3或者5或者7的幂的组号,而在ext2文件系统里边357的幂的组号会放超级块。
static inline int test_root(int a, int b)
{
int num = b;
while (a > num)
num *= b;
return num == a;
}
static int ext2_group_sparse(int group)
{
if (group <= 1)
return 1;
return (test_root(group, 3) || test_root(group, 5) ||
test_root(group, 7));
}
ext2_bg_has_super将返回组中超级块(主或备份)使用的块数量。目前这将只有0或1。
int ext2_bg_has_super(struct super_block *sb, int group)
{
if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
!ext2_group_sparse(group))
return 0;
return 1;
}
ext2_bg_num_gdb返回组描述符表(主表或备份表)在该组中使用的块数。
unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
{
return ext2_bg_has_super(sb, group) ? EXT2_SB(sb)->s_gdb_count : 0;
}


