Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F81971266
D25736.id74712.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D25736.id74712.diff
View Options
Index: sys/kern/subr_blist.c
===================================================================
--- sys/kern/subr_blist.c
+++ sys/kern/subr_blist.c
@@ -36,15 +36,14 @@
*
* A radix tree controls access to pieces of the bitmap, and includes
* auxiliary information at each interior node about the availabilty of
- * contiguous free blocks in the subtree rooted at that node. Two radix
- * constants are involved: one for the size of the bitmaps contained in the
- * leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of
- * each of the meta (interior) nodes (BLIST_META_RADIX). Each subtree is
- * associated with a range of blocks. The root of any subtree stores a
- * hint field that defines an upper bound on the size of the largest
- * allocation that can begin in the associated block range. A hint is an
- * upper bound on a potential allocation, but not necessarily a tight upper
- * bound.
+ * contiguous free blocks in the subtree rooted at that node. A radix
+ * constant defines the size of the bitmaps contained in a leaf node
+ * and the number of descendents of each of the meta (interior) nodes.
+ * Each subtree is associated with a range of blocks. The root of any
+ * subtree stores a hint field that defines an upper bound on the size
+ * of the largest allocation that can begin in the associated block
+ * range. A hint is an upper bound on a potential allocation, but not
+ * necessarily a tight upper bound.
*
* The bitmap field in each node directs the search for available blocks.
* For a leaf node, a bit is set if the corresponding block is free. For a
@@ -64,13 +63,13 @@
*
* LAYOUT: The radix tree is laid out recursively using a linear array.
* Each meta node is immediately followed (laid out sequentially in
- * memory) by BLIST_META_RADIX lower level nodes. This is a recursive
+ * memory) by BLIST_RADIX lower-level nodes. This is a recursive
* structure but one that can be easily scanned through a very simple
* 'skip' calculation. The memory allocation is only large enough to
* cover the number of blocks requested at creation time. Nodes that
* represent blocks beyond that limit, nodes that would never be read
* or written, are not allocated, so that the last of the
- * BLIST_META_RADIX lower level nodes of a some nodes may not be
+ * BLIST_RADIX lower-level nodes of a some nodes may not be
* allocated.
*
* NOTE: the allocator cannot currently allocate more than
@@ -152,24 +151,19 @@
static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
#endif
-_Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0,
- "radix divisibility error");
-#define BLIST_BMAP_MASK (BLIST_BMAP_RADIX - 1)
-#define BLIST_META_MASK (BLIST_META_RADIX - 1)
+#define BLIST_MASK (BLIST_RADIX - 1)
/*
* For a subtree that can represent the state of up to 'radix' blocks, the
- * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm'
- * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h
+ * number of leaf nodes of the subtree is L=radix/BLIST_RADIX. If 'm'
+ * is short for BLIST_RADIX, then for a tree of height h with L=m**h
* leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
* or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip'
* in the 'meta' functions that process subtrees. Since integer division
* discards remainders, we can express this computation as
* skip = (m * m**h) / (m - 1)
- * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1)
- * and since m divides BLIST_BMAP_RADIX, we can simplify further to
- * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1)
- * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1))
+ * skip = (m * (radix / m)) / (m - 1)
+ * skip = radix / (m - 1)
* so that simple integer division by a constant can safely be used for the
* calculation.
*/
@@ -177,8 +171,7 @@
radix_to_skip(daddr_t radix)
{
- return (radix /
- ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
+ return (radix / BLIST_MASK);
}
/*
@@ -189,7 +182,7 @@
{
return (((u_daddr_t)-1 << n) &
- ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count))));
+ ((u_daddr_t)-1 >> (BLIST_RADIX - (n + count))));
}
/*
@@ -201,7 +194,7 @@
int hi, lo, mid;
lo = 0;
- hi = BLIST_BMAP_RADIX;
+ hi = BLIST_RADIX;
while (lo + 1 < hi) {
mid = (lo + hi) >> 1;
if (mask & bitrange(0, mid))
@@ -238,7 +231,7 @@
* flags - malloc flags
*
* The smallest blist consists of a single leaf node capable of
- * managing BLIST_BMAP_RADIX blocks.
+ * managing BLIST_RADIX blocks.
*/
blist_t
blist_create(daddr_t blocks, int flags)
@@ -252,11 +245,9 @@
* Calculate the radix and node count used for scanning.
*/
nodes = 1;
- radix = BLIST_BMAP_RADIX;
- while (radix <= blocks) {
- nodes += 1 + (blocks - 1) / radix;
- radix *= BLIST_META_RADIX;
- }
+ radix = 1;
+ for (radix = 1; radix <= blocks / BLIST_RADIX; radix *= BLIST_RADIX)
+ nodes += 1 + (blocks - 1) / radix / BLIST_RADIX;
bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
M_ZERO);
@@ -549,19 +540,12 @@
init_gap_stats(stats);
nodes = 0;
- i = bl->bl_radix;
- while (i < bl->bl_radix + bl->bl_blocks) {
+ radix = bl->bl_radix;
+ for (i = 0; i < bl->bl_blocks; ) {
/*
- * Find max size subtree starting at i.
- */
- radix = BLIST_BMAP_RADIX;
- while (((i / radix) & BLIST_META_MASK) == 0)
- radix *= BLIST_META_RADIX;
-
- /*
* Check for skippable subtrees starting at i.
*/
- while (radix > BLIST_BMAP_RADIX) {
+ while (radix != 1) {
if (bl->bl_root[nodes].bm_bitmap == 0) {
if (gap_stats_counting(stats))
update_gap_stats(stats, i);
@@ -572,9 +556,9 @@
* Skip subtree root.
*/
nodes++;
- radix /= BLIST_META_RADIX;
+ radix /= BLIST_RADIX;
}
- if (radix == BLIST_BMAP_RADIX) {
+ if (radix == 1) {
/*
* Scan leaf.
*/
@@ -588,8 +572,15 @@
diff ^= bitrange(digit, 1);
}
}
- nodes += radix_to_skip(radix);
- i += radix;
+ nodes += radix_to_skip(radix * BLIST_RADIX);
+ i += radix * BLIST_RADIX;
+
+ /*
+ * Find max size subtree starting at i.
+ */
+ radix = 1;
+ while (((i / BLIST_RADIX / radix) & BLIST_MASK) == 0)
+ radix *= BLIST_RADIX;
}
update_gap_stats(stats, i);
dump_gap_stats(stats, s);
@@ -623,13 +614,13 @@
daddr_t blk;
int avail, digit;
- start += BLIST_BMAP_RADIX;
- for (blk = start; blk - start < maxcount; blk += BLIST_BMAP_RADIX) {
+ start += BLIST_RADIX;
+ for (blk = start; blk - start < maxcount; blk += BLIST_RADIX) {
/* Skip meta-nodes, as long as they promise more free blocks. */
- radix = BLIST_BMAP_RADIX;
+ radix = BLIST_RADIX;
while (((++scan)->bm_bitmap & 1) == 1 &&
- ((blk / radix) & BLIST_META_MASK) == 0)
- radix *= BLIST_META_RADIX;
+ ((blk / radix) & BLIST_MASK) == 0)
+ radix *= BLIST_RADIX;
if (~scan->bm_bitmap != 0) {
/*
* Either there is no next leaf with any free blocks,
@@ -647,39 +638,38 @@
return (avail);
}
maxcount = imin(avail, maxcount);
- if (maxcount % BLIST_BMAP_RADIX == 0) {
+ if (maxcount % BLIST_RADIX == 0) {
/*
* There was no next leaf. Back scan up to
* last leaf.
*/
- --scan;
- while (radix != BLIST_BMAP_RADIX) {
- radix /= BLIST_META_RADIX;
+ do {
+ radix /= BLIST_RADIX;
--scan;
- }
- blk -= BLIST_BMAP_RADIX;
+ } while (radix != 1);
+ blk -= BLIST_RADIX;
}
}
}
/*
* 'scan' is the last leaf that provides blocks. Clear from 1 to
- * BLIST_BMAP_RADIX bits to represent the allocation of those last
+ * BLIST_RADIX bits to represent the allocation of those last
* blocks.
*/
- if (maxcount % BLIST_BMAP_RADIX != 0)
- scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_BMAP_RADIX);
+ if (maxcount % BLIST_RADIX != 0)
+ scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_RADIX);
else
scan->bm_bitmap = 0;
for (;;) {
/* Back up over meta-nodes, clearing bits if necessary. */
- blk -= BLIST_BMAP_RADIX;
- radix = BLIST_BMAP_RADIX;
- while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0) {
+ blk -= BLIST_RADIX;
+ radix = BLIST_RADIX;
+ while ((digit = ((blk / radix) & BLIST_MASK)) == 0) {
if ((scan--)->bm_bitmap == 0)
scan->bm_bitmap ^= 1;
- radix *= BLIST_META_RADIX;
+ radix *= BLIST_RADIX;
}
if ((scan--)->bm_bitmap == 0)
scan[-digit * radix_to_skip(radix)].bm_bitmap ^=
@@ -734,17 +724,17 @@
}
/* Discard any candidates that appear before blk. */
- if ((blk & BLIST_BMAP_MASK) != 0) {
- if ((~mask & bitrange(0, blk & BLIST_BMAP_MASK)) != 0) {
+ if ((blk & BLIST_MASK) != 0) {
+ if ((~mask & bitrange(0, blk & BLIST_MASK)) != 0) {
/* Grow bighint in case all discarded bits are set. */
- bighint += blk & BLIST_BMAP_MASK;
- mask |= bitrange(0, blk & BLIST_BMAP_MASK);
+ bighint += blk & BLIST_MASK;
+ mask |= bitrange(0, blk & BLIST_MASK);
if (~mask == 0) {
scan->bm_bighint = bighint;
return (SWAPBLK_NONE);
}
}
- blk -= blk & BLIST_BMAP_MASK;
+ blk -= blk & BLIST_MASK;
}
/*
@@ -763,17 +753,17 @@
hi = lo + maxcount;
*count = hi - lo;
mask = ~bitrange(lo, *count);
- } else if (maxcount <= BLIST_BMAP_RADIX - lo) {
+ } else if (maxcount <= BLIST_RADIX - lo) {
/* All the blocks we can use are available here. */
hi = lo + maxcount;
*count = maxcount;
mask = ~bitrange(lo, *count);
- if (hi == BLIST_BMAP_RADIX)
+ if (hi == BLIST_RADIX)
scan->bm_bighint = bighint;
} else {
/* Check next leaf for some of the blocks we want or need. */
- count1 = *count - (BLIST_BMAP_RADIX - lo);
- maxcount -= BLIST_BMAP_RADIX - lo;
+ count1 = *count - (BLIST_RADIX - lo);
+ maxcount -= BLIST_RADIX - lo;
hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
if (hi < count1)
/*
@@ -785,7 +775,7 @@
* this leaf.
*/
return (SWAPBLK_NONE);
- *count = BLIST_BMAP_RADIX - lo + hi;
+ *count = BLIST_RADIX - lo + hi;
scan->bm_bighint = bighint;
}
@@ -811,16 +801,15 @@
bool scan_from_start;
int digit;
- if (radix == BLIST_BMAP_RADIX)
+ if (radix == 1)
return (blst_leaf_alloc(scan, cursor, count, maxcount));
- blk = cursor & -radix;
+ blk = cursor & -(radix * BLIST_RADIX);
scan_from_start = (cursor == blk);
- radix /= BLIST_META_RADIX;
skip = radix_to_skip(radix);
mask = scan->bm_bitmap;
/* Discard any candidates that appear before cursor. */
- digit = (cursor / radix) & BLIST_META_MASK;
+ digit = (cursor / radix) & BLIST_MASK;
mask &= (u_daddr_t)-1 << digit;
if (mask == 0)
return (SWAPBLK_NONE);
@@ -846,7 +835,7 @@
* The allocation might fit beginning in the i'th subtree.
*/
r = blst_meta_alloc(&scan[i], cursor + digit * radix,
- count, maxcount, radix);
+ count, maxcount, radix / BLIST_RADIX);
if (r != SWAPBLK_NONE) {
if (scan[i].bm_bitmap == 0)
scan->bm_bitmap ^= bitrange(digit, 1);
@@ -860,7 +849,7 @@
* We couldn't allocate count in this subtree. If the whole tree was
* scanned, and the last tree node is allocated, update bighint.
*/
- if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
+ if (scan_from_start && !(digit == BLIST_RADIX - 1 &&
scan[i].bm_bighint == BLIST_MAX_ALLOC))
scan->bm_bighint = *count - 1;
@@ -882,7 +871,7 @@
* \_________/\__/
* count n
*/
- mask = bitrange(blk & BLIST_BMAP_MASK, count);
+ mask = bitrange(blk & BLIST_MASK, count);
KASSERT((scan->bm_bitmap & mask) == 0,
("freeing free block: %jx, size %d, mask %jx",
(uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask));
@@ -913,20 +902,26 @@
*/
scan->bm_bighint = BLIST_MAX_ALLOC;
- if (radix == BLIST_BMAP_RADIX)
+ if (radix == 1)
return (blst_leaf_free(scan, freeBlk, count));
- endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix);
- radix /= BLIST_META_RADIX;
+ endBlk = freeBlk + count;
+ blk = (freeBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX);
+ /*
+ * blk is first block past the end of the range of this meta node,
+ * or 0 in case of overflow.
+ */
+ if (blk != 0)
+ endBlk = ummin(endBlk, blk);
skip = radix_to_skip(radix);
blk = freeBlk & -radix;
- digit = (blk / radix) & BLIST_META_MASK;
- endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK);
+ digit = (blk / radix) & BLIST_MASK;
+ endDigit = 1 + (((endBlk - 1) / radix) & BLIST_MASK);
scan->bm_bitmap |= bitrange(digit, endDigit - digit);
for (i = 1 + digit * skip; blk < endBlk; i += skip) {
blk += radix;
count = ummin(blk, endBlk) - freeBlk;
- blst_meta_free(&scan[i], freeBlk, count, radix);
+ blst_meta_free(&scan[i], freeBlk, count, radix / BLIST_RADIX);
freeBlk = blk;
}
}
@@ -947,7 +942,7 @@
* Leaf node
*/
- if (radix == BLIST_BMAP_RADIX) {
+ if (radix == 1) {
u_daddr_t v = scan->bm_bitmap;
if (v == (u_daddr_t)-1) {
@@ -975,14 +970,14 @@
}
endBlk = blk + count;
- radix /= BLIST_META_RADIX;
skip = radix_to_skip(radix);
for (i = 1; blk < endBlk; i += skip) {
blk += radix;
count = radix;
if (blk >= endBlk)
count -= blk - endBlk;
- blst_copy(&scan[i], blk - radix, radix, dest, count);
+ blst_copy(&scan[i], blk - radix,
+ radix / BLIST_RADIX, dest, count);
}
}
@@ -999,7 +994,7 @@
daddr_t nblks;
u_daddr_t mask;
- mask = bitrange(blk & BLIST_BMAP_MASK, count);
+ mask = bitrange(blk & BLIST_MASK, count);
/* Count the number of blocks that we are allocating. */
nblks = bitcount64(scan->bm_bitmap & mask);
@@ -1022,20 +1017,27 @@
daddr_t blk, endBlk, i, nblks, skip;
int digit;
- if (radix == BLIST_BMAP_RADIX)
+ if (radix == 1)
return (blst_leaf_fill(scan, allocBlk, count));
- endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix);
- radix /= BLIST_META_RADIX;
+ endBlk = allocBlk + count;
+ blk = (allocBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX);
+ /*
+ * blk is first block past the end of the range of this meta node,
+ * or 0 in case of overflow.
+ */
+ if (blk != 0)
+ endBlk = ummin(endBlk, blk);
skip = radix_to_skip(radix);
blk = allocBlk & -radix;
nblks = 0;
while (blk < endBlk) {
- digit = (blk / radix) & BLIST_META_MASK;
+ digit = (blk / radix) & BLIST_MASK;
i = 1 + digit * skip;
blk += radix;
count = ummin(blk, endBlk) - allocBlk;
- nblks += blst_meta_fill(&scan[i], allocBlk, count, radix);
+ nblks += blst_meta_fill(&scan[i], allocBlk, count,
+ radix / BLIST_RADIX);
if (scan[i].bm_bitmap == 0)
scan->bm_bitmap &= ~((u_daddr_t)1 << digit);
allocBlk = blk;
@@ -1052,12 +1054,12 @@
u_daddr_t mask;
int digit;
- if (radix == BLIST_BMAP_RADIX) {
+ if (radix == 1) {
printf(
"%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n",
tab, tab, "",
- (long long)blk, (long long)radix,
- 1 + (BLIST_BMAP_RADIX - 1) / 4,
+ (long long)blk, (long long)BLIST_RADIX,
+ (int)(1 + (BLIST_RADIX - 1) / 4),
(long long)scan->bm_bitmap,
(long long)scan->bm_bighint
);
@@ -1067,14 +1069,13 @@
printf(
"%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n",
tab, tab, "",
- (long long)blk, (long long)radix,
- (long long)radix,
- 1 + (BLIST_META_RADIX - 1) / 4,
+ (long long)blk, (long long)radix * BLIST_RADIX,
+ (long long)radix * BLIST_RADIX,
+ (int)(1 + (BLIST_RADIX - 1) / 4),
(long long)scan->bm_bitmap,
(long long)scan->bm_bighint
);
- radix /= BLIST_META_RADIX;
skip = radix_to_skip(radix);
tab += 4;
@@ -1083,7 +1084,7 @@
do {
digit = bitpos(mask);
blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
- radix, tab);
+ radix / BLIST_RADIX, tab);
} while ((mask ^= bitrange(digit, 1)) != 0);
tab -= 4;
@@ -1100,7 +1101,7 @@
int
main(int ac, char **av)
{
- int size = BLIST_META_RADIX * BLIST_BMAP_RADIX;
+ daddr_t size = BLIST_RADIX * BLIST_RADIX;
int i;
blist_t bl;
struct sbuf *s;
@@ -1108,7 +1109,7 @@
for (i = 1; i < ac; ++i) {
const char *ptr = av[i];
if (*ptr != '-') {
- size = strtol(ptr, NULL, 0);
+ size = strtoll(ptr, NULL, 0);
continue;
}
ptr += 2;
@@ -1116,6 +1117,10 @@
exit(1);
}
bl = blist_create(size, M_WAITOK);
+ if (bl == NULL) {
+ fprintf(stderr, "blist_create failed\n");
+ exit(1);
+ }
blist_free(bl, 0, size);
for (;;) {
@@ -1124,7 +1129,7 @@
int count = 0, maxcount = 0;
printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
- (long long)size, (long long)bl->bl_radix);
+ (long long)size, (long long)bl->bl_radix * BLIST_RADIX);
fflush(stdout);
if (fgets(buf, sizeof(buf), stdin) == NULL)
break;
Index: sys/sys/blist.h
===================================================================
--- sys/sys/blist.h
+++ sys/sys/blist.h
@@ -85,10 +85,9 @@
blmeta_t bl_root[1]; /* root of radix tree */
} *blist_t;
-#define BLIST_BMAP_RADIX (sizeof(u_daddr_t)*8)
-#define BLIST_META_RADIX BLIST_BMAP_RADIX
+#define BLIST_RADIX (sizeof(u_daddr_t)*8)
-#define BLIST_MAX_ALLOC BLIST_BMAP_RADIX
+#define BLIST_MAX_ALLOC BLIST_RADIX
struct sbuf;
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -2328,7 +2328,6 @@
{
struct swdevt *sp, *tsp;
daddr_t dvbase;
- u_long mblocks;
/*
* nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks.
@@ -2339,19 +2338,8 @@
nblks &= ~(ctodb(1) - 1);
nblks = dbtoc(nblks);
- /*
- * If we go beyond this, we get overflows in the radix
- * tree bitmap code.
- */
- mblocks = 0x40000000 / BLIST_META_RADIX;
- if (nblks > mblocks) {
- printf(
- "WARNING: reducing swap size to maximum of %luMB per unit\n",
- mblocks / 1024 / 1024 * PAGE_SIZE);
- nblks = mblocks;
- }
-
sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO);
+ sp->sw_blist = blist_create(nblks, M_WAITOK);
sp->sw_vp = vp;
sp->sw_id = id;
sp->sw_dev = dev;
@@ -2361,7 +2349,6 @@
sp->sw_close = close;
sp->sw_flags = flags;
- sp->sw_blist = blist_create(nblks, M_WAITOK);
/*
* Do not free the first blocks in order to avoid overwriting
* any bsd label at the front of the partition
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Dec 16, 12:56 AM (41 m, 32 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
9092155
Default Alt Text
D25736.id74712.diff (17 KB)
Attached To
Mode
D25736: Avoid overflow in blist_create, remove swap pager checks before blist_create
Attached
Detach File
Event Timeline
Log In to Comment