diff --git a/share/man/man4/witness.4 b/share/man/man4/witness.4 --- a/share/man/man4/witness.4 +++ b/share/man/man4/witness.4 @@ -21,7 +21,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 5, 2025 +.Dd January 19, 2026 .Dt WITNESS 4 .Os .Sh NAME @@ -139,6 +139,27 @@ This sysctl can be set via .Xr loader 8 . .Pp +The sysctl +.Va debug.witness.badstacks +displays a listing of detected lock order violations cached in the +.Nm +module's current view of the lock hierarchy. +(This means that it may not display information for locks which have been +destroyed.) +It displays a similar level of detail to the messages produced by the run-time +checks. +.Pp +The sysctl +.Va debug.witness.badstacks_verbose +displays a similar listing to the sysctl +.Va debug.witness.badstacks . +However, it tries to show all observed lock chains that contribute to the +lock order. +It uses '**' to mark lock orders which were attempted but not added to the +hierarchy because they violated the hierarchy the +.Nm +code had previously observed. +.Pp The .Nm code also provides three extra diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -443,6 +443,14 @@ sysctl_debug_witness_badstacks, "A", "Show bad witness stacks"); +/* + * Call this to print out the witness faulty stacks with verbose detail. + */ +SYSCTL_PROC(_debug_witness, OID_AUTO, badstacks_verbose, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 1, + sysctl_debug_witness_badstacks, "A", + "Show bad witness stacks with verbose detail"); + static struct mtx w_mtx; /* w_list */ @@ -2644,17 +2652,248 @@ } #endif +#define NUM_VERBOSE_STACKS 256 +#define MAX_LOCKCHAIN_RECURSION 32 + +struct verbose_tracker { + struct witness t_w1, t_w2; + struct stack t_stack; + struct sbuf *sb; + int generation; + int pairs[2 * NUM_VERBOSE_STACKS]; + int pair_count; + int recursion_list[MAX_LOCKCHAIN_RECURSION]; + int recursion_count; +}; + +static struct verbose_tracker * +get_verbose_tracker(struct sbuf *sb) +{ + struct verbose_tracker *rv; + + rv = malloc(sizeof(struct verbose_tracker), M_TEMP, M_WAITOK | M_ZERO); + rv->sb = sb; + return (rv); +} + +static void +verbose_tracker_checks(struct verbose_tracker *t) +{ + + KASSERT(t->recursion_count == 0, + ("verbose_tracker recursion count is %d (expected 0)", + t->recursion_count)); +} + +static void +reset_verbose_tracker(struct verbose_tracker *t, int generation) +{ + + if (t == NULL) + return; + verbose_tracker_checks(t); + t->pair_count = 0; + t->generation = generation; +} + +static bool +has_verbose_lockpair(const struct verbose_tracker *t, int from, int to) +{ + int i; + + /* Look for value. */ + for (i = 0; i < (2 * t->pair_count); i += 2) + if (t->pairs[i] == from && t->pairs[i + 1] == to) + return (true); + return (false); +} + +static void +add_verbose_lockpair(struct verbose_tracker *t, int from, int to) +{ + + /* Check for duplicates. */ + if (has_verbose_lockpair(t, from, to)) + return; + + /* Add a new value. */ + if (t->pair_count < NUM_VERBOSE_STACKS) { + t->pairs[t->pair_count * 2] = from; + t->pairs[(t->pair_count * 2) + 1] = to; + t->pair_count++; + } +} + static void -sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx) +free_verbose_tracker(struct verbose_tracker *t) +{ + + if (t == NULL) + return; + verbose_tracker_checks(t); + free(t, M_TEMP); +} + +static bool +sbuf_print_verbose_witness_chains(struct verbose_tracker *t, int from, int to) +{ + struct witness *w1, *w2; + int found, i; + + found = 0; + + mtx_lock_spin(&w_mtx); + if (t->generation != w_generation) { + mtx_unlock_spin(&w_mtx); + + /* + * The graph has changed. Break the recursion loop. + * The calling function should figure out what happened and + * restart. + */ + return (false); + } + + /* + * Check for a direct dependence. If so, print that here. + * However, we keep scanning just in case there are other + * locking paths between these two locks. + */ + w1 = &w_data[from]; + w2 = &w_data[to]; + if (isitmychild(w1, w2)) { + t->t_w1 = *w1; + t->t_w2 = *w2; + mtx_unlock_spin(&w_mtx); + + sbuf_printf(t->sb, "\"%s\" -> \"%s\"", + t->t_w1.w_name, t->t_w2.w_name); + + /* Add the lockchain which got us here. */ + KASSERT(t->recursion_count >= 0 && + t->recursion_count <= MAX_LOCKCHAIN_RECURSION, + ("Invalid t->recursion_count: %d", t->recursion_count)); + for (i = t->recursion_count - 1; i >= 0; i--) { + mtx_lock_spin(&w_mtx); + if (t->generation != w_generation) { + mtx_unlock_spin(&w_mtx); + /* The graph has changed. */ + return (false); + } + /* + * Make a local copy, drop the lock, and add the lock + * to the sbuf. + */ + t->t_w1 = w_data[t->recursion_list[i]]; + mtx_unlock_spin(&w_mtx); + sbuf_printf(t->sb, " -> \"%s\"", t->t_w1.w_name); + } + + sbuf_putc(t->sb, '\n'); + add_verbose_lockpair(t, from, to); + found++; + + mtx_lock_spin(&w_mtx); + if (t->generation != w_generation) { + mtx_unlock_spin(&w_mtx); + return (false); + } + } + + /* + * Ensure we aren't recursing too many times. We do this check + * after looking for direct dependencies so we don't fail to + * catch at least those at the limits of our recursion. + */ + if (t->recursion_count >= MAX_LOCKCHAIN_RECURSION) { + mtx_unlock_spin(&w_mtx); + return (found > 0); + } + + /* + * Record our 'to' lock on the recursion list. We will use this + * to build successful lock chains later. + * From this point on, we need to make sure we decrement the + * recursion count when returning. + */ + t->recursion_list[t->recursion_count] = to; + t->recursion_count++; + + /* Walk all parents of 'to' to see if any have a path to 'from'. */ + for (i = 1; i < w_max_used_index; i++) { + if (i == to || i == from) + continue; + if (isitmychild(&w_data[i], &w_data[to])) { + /* Drop the lock and recurse. */ + mtx_unlock_spin(&w_mtx); + if (sbuf_print_verbose_witness_chains(t, from, i)) { + add_verbose_lockpair(t, i, to); + found++; + } + mtx_lock_spin(&w_mtx); + if (t->generation != w_generation) + break; + } + } + mtx_unlock_spin(&w_mtx); + t->recursion_count--; + return (found > 0); +} + +static void +sbuf_print_verbose_witness_stacks(struct verbose_tracker *t) +{ + struct witness_lock_order_data *data; + int i; + + for (i = 0; i < (2 * t->pair_count); i += 2) { + mtx_lock_spin(&w_mtx); + if (t->generation != w_generation) { + /* + * The graph has changed. Return to the calling + * function so it can restart. + */ + mtx_unlock_spin(&w_mtx); + break; + } + + /* + * Make a local copy of the data we need so we can drop + * the lock. + */ + t->t_w1 = w_data[t->pairs[i]]; + t->t_w2 = w_data[t->pairs[i + 1]]; + data = witness_lock_order_get(&t->t_w1, &t->t_w2); + if (data != NULL) + stack_copy(&data->wlod_stack, &t->t_stack); + mtx_unlock_spin(&w_mtx); + + sbuf_printf(t->sb, + "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", + t->t_w1.w_name, t->t_w1.w_class->lc_name, + t->t_w2.w_name, t->t_w2.w_class->lc_name); + if (data != NULL) + stack_sbuf_print(t->sb, &t->t_stack); + else + sbuf_printf(t->sb, + "(No stack trace--hardcoded relationship?)\n"); + sbuf_putc(t->sb, '\n'); + } +} + +static void +sbuf_print_witness_badstacks(struct sbuf *sb, size_t *oldidx, bool verbose) { struct witness_lock_order_data *data1, *data2, *tmp_data1, *tmp_data2; struct witness *tmp_w1, *tmp_w2, *w1, *w2; + struct verbose_tracker *t; int generation, i, j; tmp_data1 = NULL; tmp_data2 = NULL; tmp_w1 = NULL; tmp_w2 = NULL; + t = NULL; /* Allocate and init temporary storage space. */ tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO); @@ -2665,11 +2904,14 @@ M_WAITOK | M_ZERO); stack_zero(&tmp_data1->wlod_stack); stack_zero(&tmp_data2->wlod_stack); + if (verbose) + t = get_verbose_tracker(sb); restart: mtx_lock_spin(&w_mtx); generation = w_generation; mtx_unlock_spin(&w_mtx); + reset_verbose_tracker(t, generation); sbuf_printf(sb, "Number of known direct relationships is %d\n", w_lohash.wloh_count); for (i = 1; i < w_max_used_index; i++) { @@ -2738,7 +2980,23 @@ "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n", tmp_w1->w_name, tmp_w1->w_class->lc_name, tmp_w2->w_name, tmp_w2->w_class->lc_name); - if (data1) { + if (verbose) { + sbuf_printf(sb, + "All lock orders from \"%s\"(%s) -> \"%s\"(%s):\n", + tmp_w1->w_name, tmp_w1->w_class->lc_name, + tmp_w2->w_name, tmp_w2->w_class->lc_name); + sbuf_print_verbose_witness_chains(t, i, j); + if (data1 && !has_verbose_lockpair(t, i, j)) { + sbuf_printf(t->sb, + "** \"%s\" -> \"%s\"\n", + tmp_w1->w_name, tmp_w2->w_name); + add_verbose_lockpair(t, i, j); + } + sbuf_putc(sb, '\n'); + sbuf_print_verbose_witness_stacks(t); + sbuf_putc(sb, '\n'); + reset_verbose_tracker(t, generation); + } else if (data1) { sbuf_printf(sb, "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", tmp_w1->w_name, tmp_w1->w_class->lc_name, @@ -2746,7 +3004,24 @@ stack_sbuf_print(sb, &tmp_data1->wlod_stack); sbuf_putc(sb, '\n'); } - if (data2 && data2 != data1) { + if (verbose) { + sbuf_printf(sb, + "All lock orders from \"%s\"(%s) -> \"%s\"(%s):\n", + tmp_w2->w_name, tmp_w2->w_class->lc_name, + tmp_w1->w_name, tmp_w1->w_class->lc_name); + sbuf_print_verbose_witness_chains(t, j, i); + if (data2 && data2 != data1 && + !has_verbose_lockpair(t, j, i)) { + sbuf_printf(t->sb, + "** \"%s\" -> \"%s\"\n", + tmp_w2->w_name, tmp_w1->w_name); + add_verbose_lockpair(t, j, i); + } + sbuf_putc(sb, '\n'); + sbuf_print_verbose_witness_stacks(t); + sbuf_putc(sb, '\n'); + reset_verbose_tracker(t, generation); + } else if (data2 && data2 != data1) { sbuf_printf(sb, "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", tmp_w2->w_name, tmp_w2->w_class->lc_name, @@ -2775,6 +3050,7 @@ free(tmp_data2, M_TEMP); free(tmp_w1, M_TEMP); free(tmp_w2, M_TEMP); + free_verbose_tracker(t); } static int @@ -2796,7 +3072,7 @@ if (sb == NULL) return (ENOMEM); - sbuf_print_witness_badstacks(sb, &req->oldidx); + sbuf_print_witness_badstacks(sb, &req->oldidx, arg2); sbuf_finish(sb); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); @@ -2820,7 +3096,7 @@ sbuf_new(&sb, buffer, sizeof(buffer), SBUF_FIXEDLEN); sbuf_set_drain(&sb, sbuf_db_printf_drain, NULL); - sbuf_print_witness_badstacks(&sb, &dummy); + sbuf_print_witness_badstacks(&sb, &dummy, false); sbuf_finish(&sb); } #endif