While droping extent map structures from the extent cache that cover our
target range, we would remove each extent map structure from the red black
tree and then add either 1 or 2 new extent map structures if the former
extent map covered sections outside our target range.
This change simply attempts to replace the existing extent map structure
with a new one that covers the subsection we're not interested in, instead
of doing a red black remove operation followed by an insertion operation.
The number of elements in an inode's extent map tree can get very high for large
files under random writes. For example, while running the following test:
sysbench --test=fileio --file-num=1 --file-total-size=10G \
--file-test-mode=rndrw --num-threads=32 --file-block-size=32768 \
--max-requests=500000 --file-rw-ratio=2 [prepare|run]
I captured the following histogram capturing the number of extent_map items
in the red black tree while that test was running:
Count: 122462
Range: 1.000 - 172231.000; Mean: 96415.831; Median: 101855.000; Stddev: 49700.981
Percentiles: 90th: 160120.000; 95th: 166335.000; 99th: 171070.000
1.000 - 5.231: 452 |
5.231 - 187.392: 87 |
187.392 - 585.911: 206 |
585.911 - 1827.438: 623 |
1827.438 - 5695.245: 1962 #
5695.245 - 17744.861: 6204 ####
17744.861 - 55283.764: 21115 ############
55283.764 - 172231.000: 91813 #####################################################
Benchmark:
sysbench --test=fileio --file-num=1 --file-total-size=10G --file-test-mode=rndwr \
--num-threads=64 --file-block-size=32768 --max-requests=0 --max-time=60 \
--file-io-mode=sync --file-fsync-freq=0 [prepare|run]
Before this change: 122.1Mb/sec
After this change: 125.07Mb/sec
(averages of 5 test runs)
Test machine: quad core intel i5-3570K, 32Gb of ram, SSD
Signed-off-by: Filipe David Borba Manana <[email protected]>
Signed-off-by: Josef Bacik <[email protected]>
try_merge_map(tree, em);
}
+static inline void setup_extent_mapping(struct extent_map_tree *tree,
+ struct extent_map *em,
+ int modified)
+{
+ atomic_inc(&em->refs);
+ em->mod_start = em->start;
+ em->mod_len = em->len;
+
+ if (modified)
+ list_move(&em->list, &tree->modified_extents);
+ else
+ try_merge_map(tree, em);
+}
+
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
if (ret)
goto out;
- atomic_inc(&em->refs);
-
- em->mod_start = em->start;
- em->mod_len = em->len;
-
- if (modified)
- list_move(&em->list, &tree->modified_extents);
- else
- try_merge_map(tree, em);
+ setup_extent_mapping(tree, em, modified);
out:
return ret;
}
RB_CLEAR_NODE(&em->rb_node);
return ret;
}
+
+void replace_extent_mapping(struct extent_map_tree *tree,
+ struct extent_map *cur,
+ struct extent_map *new,
+ int modified)
+{
+ WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
+ ASSERT(extent_map_in_tree(cur));
+ if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
+ list_del_init(&cur->list);
+ rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map);
+ RB_CLEAR_NODE(&cur->rb_node);
+
+ setup_extent_mapping(tree, new, modified);
+}
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified);
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
+void replace_extent_mapping(struct extent_map_tree *tree,
+ struct extent_map *cur,
+ struct extent_map *new,
+ int modified);
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
clear_bit(EXTENT_FLAG_LOGGING, &flags);
modified = !list_empty(&em->list);
- remove_extent_mapping(em_tree, em);
if (no_splits)
goto next;
split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
- ret = add_extent_mapping(em_tree, split, modified);
- BUG_ON(ret); /* Logic error */
+ replace_extent_mapping(em_tree, em, split, modified);
free_extent_map(split);
split = split2;
split2 = NULL;
split->orig_block_len = 0;
}
- ret = add_extent_mapping(em_tree, split, modified);
- BUG_ON(ret); /* Logic error */
+ if (extent_map_in_tree(em)) {
+ replace_extent_mapping(em_tree, em, split,
+ modified);
+ } else {
+ ret = add_extent_mapping(em_tree, split,
+ modified);
+ ASSERT(ret == 0); /* Logic error */
+ }
free_extent_map(split);
split = NULL;
}
next:
+ if (extent_map_in_tree(em))
+ remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
/* once for us */