diff --git a/sys/netpfil/ipfw/ip_fw_table.h b/sys/netpfil/ipfw/ip_fw_table.h --- a/sys/netpfil/ipfw/ip_fw_table.h +++ b/sys/netpfil/ipfw/ip_fw_table.h @@ -32,7 +32,39 @@ */ #ifdef _KERNEL -struct table_algo; +/* + * Table has the following `type` concepts: + * + * `no.type` represents lookup key type (addr, ifp, uid, etc..) + * vmask represents bitmask of table values which are present at the moment. + * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old + * single-value-for-all approach. + */ +struct table_config { + struct named_object no; + uint8_t tflags; /* type flags */ + uint8_t locked; /* 1 if locked from changes */ + uint8_t linked; /* 1 if already linked */ + uint8_t ochanged; /* used by set swapping */ + uint8_t vshared; /* 1 if using shared value array */ + uint8_t spare[3]; + uint32_t count; /* Number of records */ + uint32_t limit; /* Max number of records */ + uint32_t vmask; /* bitmask with supported values */ + uint32_t ocount; /* used by set swapping */ + uint64_t gencnt; /* generation count */ + char tablename[64]; /* table name */ + struct table_algo *ta; /* Callbacks for given algo */ + void *astate; /* algorithm state */ + struct table_info { + table_lookup_t *lookup;/* Lookup function */ + void *state; /* Lookup radix/other structure */ + void *xstate;/* eXtended state */ + u_long data; /* Hints for given func */ + } ti_copy; /* data to put to table_info */ + struct namedobj_instance *vi; +}; + struct tables_config { struct namedobj_instance *namehash; struct namedobj_instance *valhash; @@ -40,18 +72,9 @@ uint32_t algo_count; struct table_algo *algo[256]; struct table_algo *def_algo[IPFW_TABLE_MAXTYPE + 1]; - TAILQ_HEAD(op_state_l,op_state) state_list; }; #define CHAIN_TO_TCFG(chain) ((struct tables_config *)(chain)->tblcfg) -struct table_info { - table_lookup_t *lookup; /* Lookup function */ - void *state; /* Lookup radix/other structure */ - void *xstate; /* eXtended state */ - u_long data; /* Hints for given func */ -}; - -struct table_value; struct tentry_info { void *paddr; struct table_value *pvalue; @@ -159,18 +182,16 @@ /* ipfw_table_value.c functions */ struct table_config; -struct tableop_state; void ipfw_table_value_init(struct ip_fw_chain *ch, int first); void ipfw_table_value_destroy(struct ip_fw_chain *ch, int last); -int ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts, - uint8_t flags); +int ipfw_link_table_values(struct ip_fw_chain *ch, struct table_config *tc, + struct tentry_info *tei, uint32_t count, uint8_t flags); void ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc, struct tentry_info *tei, uint32_t count, int rollback); void ipfw_import_table_value_v1(ipfw_table_value *iv); void ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *iv); void ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc, struct table_algo *ta, void *astate, struct table_info *ti); -void rollback_table_values(struct tableop_state *ts); int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci); @@ -189,32 +210,5 @@ int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint32_t kidx, ta_foreach_f f, void *arg); -/* internal functions */ -void tc_ref(struct table_config *tc); -void tc_unref(struct table_config *tc); - -struct op_state; -typedef void (op_rollback_f)(void *object, struct op_state *state); -struct op_state { - TAILQ_ENTRY(op_state) next; /* chain link */ - op_rollback_f *func; -}; - -struct tableop_state { - struct op_state opstate; - struct ip_fw_chain *ch; - struct table_config *tc; - struct table_algo *ta; - struct tentry_info *tei; - uint32_t count; - uint32_t vmask; - int vshared; - int modified; -}; - -void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts); -void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts); -void rollback_toperation_state(struct ip_fw_chain *ch, void *object); - #endif /* _KERNEL */ #endif /* _IPFW2_TABLE_H */ diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -61,34 +61,6 @@ #include #include - /* - * Table has the following `type` concepts: - * - * `no.type` represents lookup key type (addr, ifp, uid, etc..) - * vmask represents bitmask of table values which are present at the moment. - * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old - * single-value-for-all approach. - */ -struct table_config { - struct named_object no; - uint8_t tflags; /* type flags */ - uint8_t locked; /* 1 if locked from changes */ - uint8_t linked; /* 1 if already linked */ - uint8_t ochanged; /* used by set swapping */ - uint8_t vshared; /* 1 if using shared value array */ - uint8_t spare[3]; - uint32_t count; /* Number of records */ - uint32_t limit; /* Max number of records */ - uint32_t vmask; /* bitmask with supported values */ - uint32_t ocount; /* used by set swapping */ - uint64_t gencnt; /* generation count */ - char tablename[64]; /* table name */ - struct table_algo *ta; /* Callbacks for given algo */ - void *astate; /* algorithm state */ - struct table_info ti_copy; /* data to put to table_info */ - struct namedobj_instance *vi; -}; - static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, struct table_config **tc); static struct table_config *find_table(struct namedobj_instance *ni, @@ -115,8 +87,8 @@ struct tid_info *b); static int check_table_name(const char *name); -static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, - struct table_config *tc, struct table_info *ti, uint32_t count); +static int check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count); static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); static struct table_algo *find_table_algo(struct tables_config *tableconf, @@ -130,49 +102,6 @@ #define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ -void -rollback_toperation_state(struct ip_fw_chain *ch, void *object) -{ - struct tables_config *tcfg; - struct op_state *os; - - tcfg = CHAIN_TO_TCFG(ch); - TAILQ_FOREACH(os, &tcfg->state_list, next) - os->func(object, os); -} - -void -add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) -{ - struct tables_config *tcfg; - - tcfg = CHAIN_TO_TCFG(ch); - TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next); -} - -void -del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) -{ - struct tables_config *tcfg; - - tcfg = CHAIN_TO_TCFG(ch); - TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next); -} - -void -tc_ref(struct table_config *tc) -{ - - tc->no.refcnt++; -} - -void -tc_unref(struct table_config *tc) -{ - - tc->no.refcnt--; -} - static struct table_value * get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) { @@ -473,57 +402,9 @@ free(ta_buf_m, M_TEMP); } -static void -rollback_add_entry(void *object, struct op_state *_state) -{ - struct ip_fw_chain *ch __diagused; - struct tableop_state *ts; - - ts = (struct tableop_state *)_state; - - if (ts->tc != object && ts->ch != object) - return; - - ch = ts->ch; - - IPFW_UH_WLOCK_ASSERT(ch); - - /* Call specifid unlockers */ - rollback_table_values(ts); - - /* Indicate we've called */ - ts->modified = 1; -} - /* * Adds/updates one or more entries in table @ti. * - * Function may drop/reacquire UH wlock multiple times due to - * items alloc, algorithm callbacks (check_space), value linkage - * (new values, value storage realloc), etc.. - * Other processes like other adds (which may involve storage resize), - * table swaps (which changes table data and may change algo type), - * table modify (which may change value mask) may be executed - * simultaneously so we need to deal with it. - * - * The following approach was implemented: - * we have per-chain linked list, protected with UH lock. - * add_table_entry prepares special on-stack structure wthich is passed - * to its descendants. Users add this structure to this list before unlock. - * After performing needed operations and acquiring UH lock back, each user - * checks if structure has changed. If true, it rolls local state back and - * returns without error to the caller. - * add_table_entry() on its own checks if structure has changed and restarts - * its operation from the beginning (goto restart). - * - * Functions which are modifying fields of interest (currently - * resize_shared_value_storage() and swap_tables() ) - * traverses given list while holding UH lock immediately before - * performing their operations calling function provided be list entry - * ( currently rollback_add_entry ) which performs rollback for all necessary - * state and sets appropriate values in structure indicating rollback - * has happened. - * * Algo interaction: * Function references @ti first to ensure table won't * disappear or change its type. @@ -542,86 +423,47 @@ struct table_config *tc; struct table_algo *ta; struct tentry_info *ptei; - struct tableop_state ts; char ta_buf[TA_BUF_SZ]; caddr_t ta_buf_m, v; uint32_t kidx, num, numadd; - int error, first_error, i, rollback; + int error, first_error, i, rollback = 0; - memset(&ts, 0, sizeof(ts)); - ta = NULL; IPFW_UH_WLOCK(ch); /* * Find and reference existing table. */ -restart: - if (ts.modified != 0) { - flush_batch_buffer(ch, ta, tei, count, rollback, - ta_buf_m, ta_buf); - memset(&ts, 0, sizeof(ts)); - ta = NULL; - } - error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } + /* Drop reference we've used in first search */ + tc->no.refcnt--; ta = tc->ta; - /* Fill in tablestate */ - ts.ch = ch; - ts.opstate.func = rollback_add_entry; - ts.tc = tc; - ts.vshared = tc->vshared; - ts.vmask = tc->vmask; - ts.ta = ta; - ts.tei = tei; - ts.count = count; - rollback = 0; - add_toperation_state(ch, &ts); - /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); - - del_toperation_state(ch, &ts); - /* Drop reference we've used in first search */ - tc->no.refcnt--; - - /* Check prepare_batch_buffer() error */ if (error != 0) goto cleanup; - /* - * Check if table swap has happened. - * (so table algo might be changed). - * Restart operation to achieve consistent behavior. - */ - if (ts.modified != 0) - goto restart; - /* * Link all values values to shared/per-table value array. */ - error = ipfw_link_table_values(ch, &ts, flags); + error = ipfw_link_table_values(ch, tc, tei, count, flags); if (error != 0) goto cleanup; - if (ts.modified != 0) - goto restart; /* * Ensure we are able to add all entries without additional * memory allocations. */ kidx = tc->no.kidx; - error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), count); if (error != 0) goto cleanup; - if (ts.modified != 0) - goto restart; /* We've got valid table in @tc. Let's try to add data */ kidx = tc->no.kidx; @@ -681,7 +523,7 @@ /* Permit post-add algorithm grow/rehash. */ if (numadd != 0) - check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); + check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); /* Return first error to user, if any */ error = first_error; @@ -767,7 +609,7 @@ if (numdel != 0) { /* Run post-del hook to permit shrinking */ - check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); + check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); } IPFW_UH_WUNLOCK(ch); @@ -796,8 +638,8 @@ * Returns 0 on success. */ static int -check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, - struct table_config *tc, struct table_info *ti, uint32_t count) +check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count) { struct table_algo *ta; uint64_t pflags; @@ -826,29 +668,11 @@ break; } - /* We have to shrink/grow table */ - if (ts != NULL) - add_toperation_state(ch, ts); - memset(&ta_buf, 0, sizeof(ta_buf)); error = ta->prepare_mod(ta_buf, &pflags); - - if (ts != NULL) - del_toperation_state(ch, ts); - if (error != 0) break; - if (ts != NULL && ts->modified != 0) { - /* - * Swap operation has happened - * so we're currently operating on other - * table data. Stop doing this. - */ - ta->flush_mod(ta_buf); - break; - } - /* Check if we still need to alter table */ ti = KIDX_TO_TI(ch, tc->no.kidx); if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { @@ -1097,20 +921,6 @@ return (error); } -static void -restart_flush(void *object, struct op_state *_state) -{ - struct tableop_state *ts; - - ts = (struct tableop_state *)_state; - - if (ts->tc != object) - return; - - /* Indicate we've called */ - ts->modified = 1; -} - /* * Flushes given table. * @@ -1129,8 +939,7 @@ struct table_info ti_old, ti_new, *tablestate; void *astate_old, *astate_new; char algostate[64], *pstate; - struct tableop_state ts; - int error, need_gc; + int error; uint32_t kidx; uint8_t tflags; @@ -1144,15 +953,8 @@ IPFW_UH_WUNLOCK(ch); return (ESRCH); } - need_gc = 0; astate_new = NULL; memset(&ti_new, 0, sizeof(ti_new)); -restart: - /* Set up swap handler */ - memset(&ts, 0, sizeof(ts)); - ts.opstate.func = restart_flush; - ts.tc = tc; - ta = tc->ta; /* Do not flush readonly tables */ if ((ta->flags & TA_FLAG_READONLY) != 0) { @@ -1167,16 +969,6 @@ } else pstate = NULL; tflags = tc->tflags; - tc->no.refcnt++; - add_toperation_state(ch, &ts); - - /* - * Stage 1.5: if this is not the first attempt, destroy previous state - */ - if (need_gc != 0) { - ta->destroy(astate_new, &ti_new); - need_gc = 0; - } /* * Stage 2: allocate new table instance using same algo. @@ -1188,26 +980,11 @@ * Stage 3: swap old state pointers with newly-allocated ones. * Decrease refcount. */ - tc->no.refcnt--; - del_toperation_state(ch, &ts); - if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } - /* - * Restart operation if table swap has happened: - * even if algo may be the same, algo init parameters - * may change. Restart operation instead of doing - * complex checks. - */ - if (ts.modified != 0) { - /* Delay destroying data since we're holding UH lock */ - need_gc = 1; - goto restart; - } - ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; tablestate = (struct table_info *)ch->tablestate; @@ -1347,10 +1124,6 @@ return (EACCES); } - /* Notify we're going to swap */ - rollback_toperation_state(ch, tc_a); - rollback_toperation_state(ch, tc_b); - /* Everything is fine, prepare to swap */ tablestate = (struct table_info *)ch->tablestate; ti = tablestate[tc_a->no.kidx]; @@ -1541,7 +1314,7 @@ if (tc == NULL) return (ESRCH); - tc_ref(tc); + tc->no.refcnt++; *kidx = tc->no.kidx; return (0); diff --git a/sys/netpfil/ipfw/ip_fw_table_value.c b/sys/netpfil/ipfw/ip_fw_table_value.c --- a/sys/netpfil/ipfw/ip_fw_table_value.c +++ b/sys/netpfil/ipfw/ip_fw_table_value.c @@ -125,13 +125,13 @@ } static void -get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, int vshared, +get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, struct table_value **ptv, struct namedobj_instance **pvi) { struct table_value *pval; struct namedobj_instance *vi; - if (vshared != 0) { + if (tc->vshared != 0) { pval = (struct table_value *)ch->valuestate; vi = CHAIN_TO_VI(ch); } else { @@ -212,7 +212,6 @@ /* Update pointers and notify everyone we're changing @ch */ pval = (struct table_value *)ch->valuestate; - rollback_toperation_state(ch, ch); /* Good. Let's merge */ memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size); @@ -317,48 +316,13 @@ ta->foreach(astate, ti, unref_table_value_cb, &fa); } -/* - * Table operation state handler. - * Called when we are going to change something in @tc which - * may lead to inconsistencies in on-going table data addition. - * - * Here we rollback all already committed state (table values, currently) - * and set "modified" field to non-zero value to indicate - * that we need to restart original operation. - */ -void -rollback_table_values(struct tableop_state *ts) -{ - struct ip_fw_chain *ch; - struct table_value *pval; - struct tentry_info *ptei; - struct namedobj_instance *vi; - int i; - - ch = ts->ch; - - IPFW_UH_WLOCK_ASSERT(ch); - - /* Get current table value pointer */ - get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi); - - for (i = 0; i < ts->count; i++) { - ptei = &ts->tei[i]; - - if (ptei->value == 0) - continue; - - unref_table_value(vi, pval, ptei->value); - } -} - /* * Allocate new value index in either shared or per-table array. * * Returns 0 on success. */ static int -alloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts, +alloc_table_vidx(struct ip_fw_chain *ch, struct table_config *tc, struct namedobj_instance *vi, uint32_t *pvidx, uint8_t flags) { int error, vlimit; @@ -366,17 +330,11 @@ IPFW_UH_WLOCK_ASSERT(ch); - error = ipfw_objhash_alloc_idx(vi, &vidx); - if (error != 0) { - /* - * We need to resize array. - */ - ts->opstate.func(ts->tc, &ts->opstate); - error = resize_shared_value_storage(ch); - return (error); /* ts->modified should be set, we will restart */ - } + if ((error = ipfw_objhash_alloc_idx(vi, &vidx)) != 0 && + (error = resize_shared_value_storage(ch)) != 0) + return (error); - vlimit = ts->ta->vlimit; + vlimit = tc->ta->vlimit; if (vlimit != 0 && vidx >= vlimit && !(flags & IPFW_CTF_ATOMIC)) { /* * Algorithm is not able to store given index. @@ -384,7 +342,7 @@ * per-table value array or return error * if we're already using it. */ - if (ts->vshared != 0) { + if (tc->vshared != 0) { /* shared -> per-table */ return (ENOSPC); /* TODO: proper error */ } @@ -429,9 +387,8 @@ /* * Get current table value pointers. - * XXX: Properly read vshared */ - get_value_ptrs(ch, tc, 1, &pval, &vi); + get_value_ptrs(ch, tc, &pval, &vi); for (i = 0; i < count; i++) { ptei = &tei[i]; @@ -462,14 +419,13 @@ * Success: return 0. */ int -ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts, - uint8_t flags) +ipfw_link_table_values(struct ip_fw_chain *ch, struct table_config *tc, + struct tentry_info *tei, uint32_t count, uint8_t flags) { int error, i, found; struct namedobj_instance *vi; - struct table_config *tc; - struct tentry_info *tei, *ptei; - uint32_t count, vidx, vlimit; + struct tentry_info *ptei; + uint32_t vidx, vlimit; struct table_val_link *ptv; struct table_value tval, *pval; @@ -478,19 +434,16 @@ * save their indices. */ IPFW_UH_WLOCK_ASSERT(ch); - get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi); + get_value_ptrs(ch, tc, &pval, &vi); error = 0; found = 0; - vlimit = ts->ta->vlimit; + vlimit = tc->ta->vlimit; vidx = 0; - tc = ts->tc; - tei = ts->tei; - count = ts->count; for (i = 0; i < count; i++) { ptei = &tei[i]; ptei->value = 0; /* Ensure value is always 0 in the beginning */ - mask_table_value(ptei->pvalue, &tval, ts->vmask); + mask_table_value(ptei->pvalue, &tval, tc->vmask); ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0, (char *)&tval); if (ptv == NULL) @@ -505,19 +458,11 @@ found++; } - if (ts->count == found) { - /* We've found all values , no need ts create new ones */ + if (count == found) { + /* We've found all values, no need to create new ones. */ return (0); } - /* - * we have added some state here, let's attach operation - * state ts the list ts be able ts rollback if necessary. - */ - add_toperation_state(ch, ts); - /* Ensure table won't disappear */ - tc_ref(tc); - /* * Stage 2: allocate objects for non-existing values. */ @@ -535,17 +480,6 @@ * Stage 3: allocate index numbers for new values * and link them to index. */ - tc_unref(tc); - del_toperation_state(ch, ts); - if (ts->modified != 0) { - /* - * In general, we should free all state/indexes here - * and return. However, we keep allocated state instead - * to ensure we achieve some progress on each restart. - */ - return (0); - } - KASSERT(pval == ch->valuestate, ("resize_storage() notify failure")); /* Let's try to link values */ @@ -553,7 +487,7 @@ ptei = &tei[i]; /* Check if record has appeared */ - mask_table_value(ptei->pvalue, &tval, ts->vmask); + mask_table_value(ptei->pvalue, &tval, tc->vmask); ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0, (char *)&tval); if (ptv != NULL) { @@ -562,14 +496,8 @@ continue; } - error = alloc_table_vidx(ch, ts, vi, &vidx, flags); - if (error != 0) { - ts->opstate.func(ts->tc, &ts->opstate); + if ((error = alloc_table_vidx(ch, tc, vi, &vidx, flags)) != 0) return (error); - } - /* value storage resize has happened, return */ - if (ts->modified != 0) - return (0); /* Finally, we have allocated valid index, let's add entry */ ptei->value = vidx;